From 75059c46d645e42e8da31fb97d003047c67b004b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 9 Jul 2021 13:59:09 -0400 Subject: [PATCH 001/770] thread_worker: Fix compile time error state is unused in the branch where with_state is false --- src/common/thread_worker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 8272985ff..0a975a869 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -39,7 +39,7 @@ public: const auto lambda = [this, func](std::stop_token stop_token) { Common::SetCurrentThreadName(thread_name.c_str()); { - std::conditional_t state{func()}; + [[maybe_unused]] std::conditional_t state{func()}; while (!stop_token.stop_requested()) { Task task; { From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 9 Jan 2021 03:30:07 -0300 Subject: [PATCH 002/770] shader: Initial recompiler work --- src/CMakeLists.txt | 1 + src/shader_recompiler/CMakeLists.txt | 86 ++ src/shader_recompiler/environment.h | 14 + src/shader_recompiler/exception.h | 42 + src/shader_recompiler/file_environment.cpp | 42 + src/shader_recompiler/file_environment.h | 21 + .../frontend/ir/attribute.cpp | 447 +++++++ src/shader_recompiler/frontend/ir/attribute.h | 242 ++++ .../frontend/ir/basic_block.cpp | 142 +++ .../frontend/ir/basic_block.h | 134 ++ .../frontend/ir/condition.cpp | 31 + src/shader_recompiler/frontend/ir/condition.h | 60 + .../frontend/ir/flow_test.cpp | 83 ++ src/shader_recompiler/frontend/ir/flow_test.h | 61 + .../frontend/ir/ir_emitter.cpp | 533 ++++++++ .../frontend/ir/ir_emitter.h | 123 ++ .../frontend/ir/microinstruction.cpp | 189 +++ .../frontend/ir/microinstruction.h | 82 ++ src/shader_recompiler/frontend/ir/opcode.cpp | 67 + src/shader_recompiler/frontend/ir/opcode.h | 44 + src/shader_recompiler/frontend/ir/opcode.inc | 142 +++ src/shader_recompiler/frontend/ir/pred.h | 28 + src/shader_recompiler/frontend/ir/reg.h | 314 +++++ src/shader_recompiler/frontend/ir/type.cpp | 36 + src/shader_recompiler/frontend/ir/type.h | 47 + src/shader_recompiler/frontend/ir/value.cpp | 124 ++ src/shader_recompiler/frontend/ir/value.h | 98 ++ .../frontend/maxwell/control_flow.cpp | 531 ++++++++ .../frontend/maxwell/control_flow.h | 137 ++ .../frontend/maxwell/decode.cpp | 149 +++ .../frontend/maxwell/decode.h | 14 + .../frontend/maxwell/instruction.h | 62 + .../frontend/maxwell/location.h | 106 ++ .../frontend/maxwell/maxwell.inc | 285 +++++ .../frontend/maxwell/opcode.cpp | 26 + .../frontend/maxwell/opcode.h | 30 + .../frontend/maxwell/program.cpp | 69 + .../frontend/maxwell/program.h | 39 + .../frontend/maxwell/termination_code.cpp | 79 ++ .../frontend/maxwell/termination_code.h | 16 + .../frontend/maxwell/translate/impl/exit.cpp | 15 + .../floating_point_conversion_integer.cpp | 133 ++ .../impl/floating_point_multi_function.cpp | 71 ++ .../frontend/maxwell/translate/impl/impl.cpp | 79 ++ .../frontend/maxwell/translate/impl/impl.h | 316 +++++ .../translate/impl/load_store_attribute.cpp | 92 ++ .../translate/impl/load_store_memory.cpp | 90 ++ .../translate/impl/not_implemented.cpp | 1105 +++++++++++++++++ .../maxwell/translate/impl/register_move.cpp | 45 + .../frontend/maxwell/translate/translate.cpp | 50 + .../frontend/maxwell/translate/translate.h | 16 + .../ir_opt/dead_code_elimination_pass.cpp | 23 + .../ir_opt/get_set_elimination_pass.cpp | 87 ++ .../ir_opt/identity_removal_pass.cpp | 37 + src/shader_recompiler/ir_opt/passes.h | 16 + .../ir_opt/verification_pass.cpp | 50 + src/shader_recompiler/main.cpp | 60 + 57 files changed, 7061 insertions(+) create mode 100644 src/shader_recompiler/CMakeLists.txt create mode 100644 src/shader_recompiler/environment.h create mode 100644 src/shader_recompiler/exception.h create mode 100644 src/shader_recompiler/file_environment.cpp create mode 100644 src/shader_recompiler/file_environment.h create mode 100644 src/shader_recompiler/frontend/ir/attribute.cpp create mode 100644 src/shader_recompiler/frontend/ir/attribute.h create mode 100644 src/shader_recompiler/frontend/ir/basic_block.cpp create mode 100644 src/shader_recompiler/frontend/ir/basic_block.h create mode 100644 src/shader_recompiler/frontend/ir/condition.cpp create mode 100644 src/shader_recompiler/frontend/ir/condition.h create mode 100644 src/shader_recompiler/frontend/ir/flow_test.cpp create mode 100644 src/shader_recompiler/frontend/ir/flow_test.h create mode 100644 src/shader_recompiler/frontend/ir/ir_emitter.cpp create mode 100644 src/shader_recompiler/frontend/ir/ir_emitter.h create mode 100644 src/shader_recompiler/frontend/ir/microinstruction.cpp create mode 100644 src/shader_recompiler/frontend/ir/microinstruction.h create mode 100644 src/shader_recompiler/frontend/ir/opcode.cpp create mode 100644 src/shader_recompiler/frontend/ir/opcode.h create mode 100644 src/shader_recompiler/frontend/ir/opcode.inc create mode 100644 src/shader_recompiler/frontend/ir/pred.h create mode 100644 src/shader_recompiler/frontend/ir/reg.h create mode 100644 src/shader_recompiler/frontend/ir/type.cpp create mode 100644 src/shader_recompiler/frontend/ir/type.h create mode 100644 src/shader_recompiler/frontend/ir/value.cpp create mode 100644 src/shader_recompiler/frontend/ir/value.h create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.h create mode 100644 src/shader_recompiler/frontend/maxwell/decode.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/decode.h create mode 100644 src/shader_recompiler/frontend/maxwell/instruction.h create mode 100644 src/shader_recompiler/frontend/maxwell/location.h create mode 100644 src/shader_recompiler/frontend/maxwell/maxwell.inc create mode 100644 src/shader_recompiler/frontend/maxwell/opcode.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/opcode.h create mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/translate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/translate.h create mode 100644 src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/identity_removal_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/passes.h create mode 100644 src/shader_recompiler/ir_opt/verification_pass.cpp create mode 100644 src/shader_recompiler/main.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f8ec8fea8..6e66dc1df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -142,6 +142,7 @@ add_subdirectory(core) add_subdirectory(audio_core) add_subdirectory(video_core) add_subdirectory(input_common) +add_subdirectory(shader_recompiler) add_subdirectory(tests) if (ENABLE_SDL2) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt new file mode 100644 index 000000000..c65846bc4 --- /dev/null +++ b/src/shader_recompiler/CMakeLists.txt @@ -0,0 +1,86 @@ +add_executable(shader_recompiler + environment.h + exception.h + file_environment.cpp + file_environment.h + frontend/ir/attribute.cpp + frontend/ir/attribute.h + frontend/ir/basic_block.cpp + frontend/ir/basic_block.h + frontend/ir/condition.cpp + frontend/ir/condition.h + frontend/ir/flow_test.cpp + frontend/ir/flow_test.h + frontend/ir/ir_emitter.cpp + frontend/ir/ir_emitter.h + frontend/ir/microinstruction.cpp + frontend/ir/microinstruction.h + frontend/ir/opcode.cpp + frontend/ir/opcode.h + frontend/ir/opcode.inc + frontend/ir/pred.h + frontend/ir/reg.h + frontend/ir/type.cpp + frontend/ir/type.h + frontend/ir/value.cpp + frontend/ir/value.h + frontend/maxwell/control_flow.cpp + frontend/maxwell/control_flow.h + frontend/maxwell/decode.cpp + frontend/maxwell/decode.h + frontend/maxwell/instruction.h + frontend/maxwell/location.h + frontend/maxwell/maxwell.inc + frontend/maxwell/opcode.cpp + frontend/maxwell/opcode.h + frontend/maxwell/program.cpp + frontend/maxwell/program.h + frontend/maxwell/termination_code.cpp + frontend/maxwell/termination_code.h + frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp + frontend/maxwell/translate/impl/floating_point_multi_function.cpp + frontend/maxwell/translate/impl/impl.cpp + frontend/maxwell/translate/impl/impl.h + frontend/maxwell/translate/impl/load_store_attribute.cpp + frontend/maxwell/translate/impl/load_store_memory.cpp + frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/register_move.cpp + frontend/maxwell/translate/translate.cpp + frontend/maxwell/translate/translate.h + ir_opt/dead_code_elimination_pass.cpp + ir_opt/get_set_elimination_pass.cpp + ir_opt/identity_removal_pass.cpp + ir_opt/passes.h + ir_opt/verification_pass.cpp + main.cpp +) +target_link_libraries(shader_recompiler PRIVATE fmt::fmt) + +if (MSVC) + target_compile_options(shader_recompiler PRIVATE + /W4 + /WX + /we4018 # 'expression' : signed/unsigned mismatch + /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) + /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch + /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data + /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4305 # 'context' : truncation from 'type1' to 'type2' + /we4800 # Implicit conversion from 'type' to bool. Possible information loss + /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior. + ) +else() + target_compile_options(shader_recompiler PRIVATE + -Werror + -Werror=conversion + -Werror=ignored-qualifiers + -Werror=implicit-fallthrough + -Werror=shadow + -Werror=sign-compare + $<$:-Werror=unused-but-set-parameter> + $<$:-Werror=unused-but-set-variable> + -Werror=unused-variable + ) +endif() + +create_target_directory_groups(shader_recompiler) diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h new file mode 100644 index 000000000..f6230e817 --- /dev/null +++ b/src/shader_recompiler/environment.h @@ -0,0 +1,14 @@ +#pragma once + +#include "common/common_types.h" + +namespace Shader { + +class Environment { +public: + virtual ~Environment() = default; + + [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h new file mode 100644 index 000000000..6fe620801 --- /dev/null +++ b/src/shader_recompiler/exception.h @@ -0,0 +1,42 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +namespace Shader { + +class LogicError : public std::logic_error { +public: + template + LogicError(const char* message, Args&&... args) + : std::logic_error{fmt::format(message, std::forward(args)...)} {} +}; + +class RuntimeError : public std::runtime_error { +public: + template + RuntimeError(const char* message, Args&&... args) + : std::runtime_error{fmt::format(message, std::forward(args)...)} {} +}; + +class NotImplementedException : public std::logic_error { +public: + template + NotImplementedException(const char* message, Args&&... args) + : std::logic_error{fmt::format(message, std::forward(args)...)} {} +}; + +class InvalidArgument : public std::invalid_argument { +public: + template + InvalidArgument(const char* message, Args&&... args) + : std::invalid_argument{fmt::format(message, std::forward(args)...)} {} +}; + +} // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp new file mode 100644 index 000000000..b34bf462b --- /dev/null +++ b/src/shader_recompiler/file_environment.cpp @@ -0,0 +1,42 @@ +#include + +#include "exception.h" +#include "file_environment.h" + +namespace Shader { + +FileEnvironment::FileEnvironment(const char* path) { + std::FILE* const file{std::fopen(path, "rb")}; + if (!file) { + throw RuntimeError("Failed to open file='{}'", path); + } + std::fseek(file, 0, SEEK_END); + const long size{std::ftell(file)}; + std::rewind(file); + if (size % 8 != 0) { + std::fclose(file); + throw RuntimeError("File size={} is not aligned to 8", size); + } + // TODO: Use a unique_ptr to avoid zero-initializing this + const size_t num_inst{static_cast(size) / 8}; + data.resize(num_inst); + if (std::fread(data.data(), 8, num_inst, file) != num_inst) { + std::fclose(file); + throw RuntimeError("Failed to read instructions={} from file='{}'", num_inst, path); + } + std::fclose(file); +} + +FileEnvironment::~FileEnvironment() = default; + +u64 FileEnvironment::ReadInstruction(u32 offset) const { + if (offset % 8 != 0) { + throw InvalidArgument("offset={} is not aligned to 8", offset); + } + if (offset / 8 >= static_cast(data.size())) { + throw InvalidArgument("offset={} is out of bounds", offset); + } + return data[offset / 8]; +} + +} // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h new file mode 100644 index 000000000..c294bc6fa --- /dev/null +++ b/src/shader_recompiler/file_environment.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include "common/common_types.h" +#include "environment.h" + +namespace Shader { + +class FileEnvironment final : public Environment { +public: + explicit FileEnvironment(const char* path); + ~FileEnvironment() override; + + u64 ReadInstruction(u32 offset) const override; + +private: + std::vector data; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp new file mode 100644 index 000000000..2fb7d576f --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -0,0 +1,447 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader::IR { + +bool IsGeneric(Attribute attribute) noexcept { + return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; +} + +int GenericAttributeIndex(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4; +} + +std::string NameOf(Attribute attribute) { + switch (attribute) { + case Attribute::PrimitiveId: + return "PrimitiveId"; + case Attribute::Layer: + return "Layer"; + case Attribute::ViewportIndex: + return "ViewportIndex"; + case Attribute::PointSize: + return "PointSize"; + case Attribute::PositionX: + return "Position.X"; + case Attribute::PositionY: + return "Position.Y"; + case Attribute::PositionZ: + return "Position.Z"; + case Attribute::PositionW: + return "Position.W"; + case Attribute::Generic0X: + return "Generic[0].X"; + case Attribute::Generic0Y: + return "Generic[0].Y"; + case Attribute::Generic0Z: + return "Generic[0].Z"; + case Attribute::Generic0W: + return "Generic[0].W"; + case Attribute::Generic1X: + return "Generic[1].X"; + case Attribute::Generic1Y: + return "Generic[1].Y"; + case Attribute::Generic1Z: + return "Generic[1].Z"; + case Attribute::Generic1W: + return "Generic[1].W"; + case Attribute::Generic2X: + return "Generic[2].X"; + case Attribute::Generic2Y: + return "Generic[2].Y"; + case Attribute::Generic2Z: + return "Generic[2].Z"; + case Attribute::Generic2W: + return "Generic[2].W"; + case Attribute::Generic3X: + return "Generic[3].X"; + case Attribute::Generic3Y: + return "Generic[3].Y"; + case Attribute::Generic3Z: + return "Generic[3].Z"; + case Attribute::Generic3W: + return "Generic[3].W"; + case Attribute::Generic4X: + return "Generic[4].X"; + case Attribute::Generic4Y: + return "Generic[4].Y"; + case Attribute::Generic4Z: + return "Generic[4].Z"; + case Attribute::Generic4W: + return "Generic[4].W"; + case Attribute::Generic5X: + return "Generic[5].X"; + case Attribute::Generic5Y: + return "Generic[5].Y"; + case Attribute::Generic5Z: + return "Generic[5].Z"; + case Attribute::Generic5W: + return "Generic[5].W"; + case Attribute::Generic6X: + return "Generic[6].X"; + case Attribute::Generic6Y: + return "Generic[6].Y"; + case Attribute::Generic6Z: + return "Generic[6].Z"; + case Attribute::Generic6W: + return "Generic[6].W"; + case Attribute::Generic7X: + return "Generic[7].X"; + case Attribute::Generic7Y: + return "Generic[7].Y"; + case Attribute::Generic7Z: + return "Generic[7].Z"; + case Attribute::Generic7W: + return "Generic[7].W"; + case Attribute::Generic8X: + return "Generic[8].X"; + case Attribute::Generic8Y: + return "Generic[8].Y"; + case Attribute::Generic8Z: + return "Generic[8].Z"; + case Attribute::Generic8W: + return "Generic[8].W"; + case Attribute::Generic9X: + return "Generic[9].X"; + case Attribute::Generic9Y: + return "Generic[9].Y"; + case Attribute::Generic9Z: + return "Generic[9].Z"; + case Attribute::Generic9W: + return "Generic[9].W"; + case Attribute::Generic10X: + return "Generic[10].X"; + case Attribute::Generic10Y: + return "Generic[10].Y"; + case Attribute::Generic10Z: + return "Generic[10].Z"; + case Attribute::Generic10W: + return "Generic[10].W"; + case Attribute::Generic11X: + return "Generic[11].X"; + case Attribute::Generic11Y: + return "Generic[11].Y"; + case Attribute::Generic11Z: + return "Generic[11].Z"; + case Attribute::Generic11W: + return "Generic[11].W"; + case Attribute::Generic12X: + return "Generic[12].X"; + case Attribute::Generic12Y: + return "Generic[12].Y"; + case Attribute::Generic12Z: + return "Generic[12].Z"; + case Attribute::Generic12W: + return "Generic[12].W"; + case Attribute::Generic13X: + return "Generic[13].X"; + case Attribute::Generic13Y: + return "Generic[13].Y"; + case Attribute::Generic13Z: + return "Generic[13].Z"; + case Attribute::Generic13W: + return "Generic[13].W"; + case Attribute::Generic14X: + return "Generic[14].X"; + case Attribute::Generic14Y: + return "Generic[14].Y"; + case Attribute::Generic14Z: + return "Generic[14].Z"; + case Attribute::Generic14W: + return "Generic[14].W"; + case Attribute::Generic15X: + return "Generic[15].X"; + case Attribute::Generic15Y: + return "Generic[15].Y"; + case Attribute::Generic15Z: + return "Generic[15].Z"; + case Attribute::Generic15W: + return "Generic[15].W"; + case Attribute::Generic16X: + return "Generic[16].X"; + case Attribute::Generic16Y: + return "Generic[16].Y"; + case Attribute::Generic16Z: + return "Generic[16].Z"; + case Attribute::Generic16W: + return "Generic[16].W"; + case Attribute::Generic17X: + return "Generic[17].X"; + case Attribute::Generic17Y: + return "Generic[17].Y"; + case Attribute::Generic17Z: + return "Generic[17].Z"; + case Attribute::Generic17W: + return "Generic[17].W"; + case Attribute::Generic18X: + return "Generic[18].X"; + case Attribute::Generic18Y: + return "Generic[18].Y"; + case Attribute::Generic18Z: + return "Generic[18].Z"; + case Attribute::Generic18W: + return "Generic[18].W"; + case Attribute::Generic19X: + return "Generic[19].X"; + case Attribute::Generic19Y: + return "Generic[19].Y"; + case Attribute::Generic19Z: + return "Generic[19].Z"; + case Attribute::Generic19W: + return "Generic[19].W"; + case Attribute::Generic20X: + return "Generic[20].X"; + case Attribute::Generic20Y: + return "Generic[20].Y"; + case Attribute::Generic20Z: + return "Generic[20].Z"; + case Attribute::Generic20W: + return "Generic[20].W"; + case Attribute::Generic21X: + return "Generic[21].X"; + case Attribute::Generic21Y: + return "Generic[21].Y"; + case Attribute::Generic21Z: + return "Generic[21].Z"; + case Attribute::Generic21W: + return "Generic[21].W"; + case Attribute::Generic22X: + return "Generic[22].X"; + case Attribute::Generic22Y: + return "Generic[22].Y"; + case Attribute::Generic22Z: + return "Generic[22].Z"; + case Attribute::Generic22W: + return "Generic[22].W"; + case Attribute::Generic23X: + return "Generic[23].X"; + case Attribute::Generic23Y: + return "Generic[23].Y"; + case Attribute::Generic23Z: + return "Generic[23].Z"; + case Attribute::Generic23W: + return "Generic[23].W"; + case Attribute::Generic24X: + return "Generic[24].X"; + case Attribute::Generic24Y: + return "Generic[24].Y"; + case Attribute::Generic24Z: + return "Generic[24].Z"; + case Attribute::Generic24W: + return "Generic[24].W"; + case Attribute::Generic25X: + return "Generic[25].X"; + case Attribute::Generic25Y: + return "Generic[25].Y"; + case Attribute::Generic25Z: + return "Generic[25].Z"; + case Attribute::Generic25W: + return "Generic[25].W"; + case Attribute::Generic26X: + return "Generic[26].X"; + case Attribute::Generic26Y: + return "Generic[26].Y"; + case Attribute::Generic26Z: + return "Generic[26].Z"; + case Attribute::Generic26W: + return "Generic[26].W"; + case Attribute::Generic27X: + return "Generic[27].X"; + case Attribute::Generic27Y: + return "Generic[27].Y"; + case Attribute::Generic27Z: + return "Generic[27].Z"; + case Attribute::Generic27W: + return "Generic[27].W"; + case Attribute::Generic28X: + return "Generic[28].X"; + case Attribute::Generic28Y: + return "Generic[28].Y"; + case Attribute::Generic28Z: + return "Generic[28].Z"; + case Attribute::Generic28W: + return "Generic[28].W"; + case Attribute::Generic29X: + return "Generic[29].X"; + case Attribute::Generic29Y: + return "Generic[29].Y"; + case Attribute::Generic29Z: + return "Generic[29].Z"; + case Attribute::Generic29W: + return "Generic[29].W"; + case Attribute::Generic30X: + return "Generic[30].X"; + case Attribute::Generic30Y: + return "Generic[30].Y"; + case Attribute::Generic30Z: + return "Generic[30].Z"; + case Attribute::Generic30W: + return "Generic[30].W"; + case Attribute::Generic31X: + return "Generic[31].X"; + case Attribute::Generic31Y: + return "Generic[31].Y"; + case Attribute::Generic31Z: + return "Generic[31].Z"; + case Attribute::Generic31W: + return "Generic[31].W"; + case Attribute::ColorFrontDiffuseR: + return "ColorFrontDiffuse.R"; + case Attribute::ColorFrontDiffuseG: + return "ColorFrontDiffuse.G"; + case Attribute::ColorFrontDiffuseB: + return "ColorFrontDiffuse.B"; + case Attribute::ColorFrontDiffuseA: + return "ColorFrontDiffuse.A"; + case Attribute::ColorFrontSpecularR: + return "ColorFrontSpecular.R"; + case Attribute::ColorFrontSpecularG: + return "ColorFrontSpecular.G"; + case Attribute::ColorFrontSpecularB: + return "ColorFrontSpecular.B"; + case Attribute::ColorFrontSpecularA: + return "ColorFrontSpecular.A"; + case Attribute::ColorBackDiffuseR: + return "ColorBackDiffuse.R"; + case Attribute::ColorBackDiffuseG: + return "ColorBackDiffuse.G"; + case Attribute::ColorBackDiffuseB: + return "ColorBackDiffuse.B"; + case Attribute::ColorBackDiffuseA: + return "ColorBackDiffuse.A"; + case Attribute::ColorBackSpecularR: + return "ColorBackSpecular.R"; + case Attribute::ColorBackSpecularG: + return "ColorBackSpecular.G"; + case Attribute::ColorBackSpecularB: + return "ColorBackSpecular.B"; + case Attribute::ColorBackSpecularA: + return "ColorBackSpecular.A"; + case Attribute::ClipDistance0: + return "ClipDistance[0]"; + case Attribute::ClipDistance1: + return "ClipDistance[1]"; + case Attribute::ClipDistance2: + return "ClipDistance[2]"; + case Attribute::ClipDistance3: + return "ClipDistance[3]"; + case Attribute::ClipDistance4: + return "ClipDistance[4]"; + case Attribute::ClipDistance5: + return "ClipDistance[5]"; + case Attribute::ClipDistance6: + return "ClipDistance[6]"; + case Attribute::ClipDistance7: + return "ClipDistance[7]"; + case Attribute::PointSpriteS: + return "PointSprite.S"; + case Attribute::PointSpriteT: + return "PointSprite.T"; + case Attribute::FogCoordinate: + return "FogCoordinate"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPoint.U"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPoint.V"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::VertexId: + return "VertexId"; + case Attribute::FixedFncTexture0S: + return "FixedFncTexture[0].S"; + case Attribute::FixedFncTexture0T: + return "FixedFncTexture[0].T"; + case Attribute::FixedFncTexture0R: + return "FixedFncTexture[0].R"; + case Attribute::FixedFncTexture0Q: + return "FixedFncTexture[0].Q"; + case Attribute::FixedFncTexture1S: + return "FixedFncTexture[1].S"; + case Attribute::FixedFncTexture1T: + return "FixedFncTexture[1].T"; + case Attribute::FixedFncTexture1R: + return "FixedFncTexture[1].R"; + case Attribute::FixedFncTexture1Q: + return "FixedFncTexture[1].Q"; + case Attribute::FixedFncTexture2S: + return "FixedFncTexture[2].S"; + case Attribute::FixedFncTexture2T: + return "FixedFncTexture[2].T"; + case Attribute::FixedFncTexture2R: + return "FixedFncTexture[2].R"; + case Attribute::FixedFncTexture2Q: + return "FixedFncTexture[2].Q"; + case Attribute::FixedFncTexture3S: + return "FixedFncTexture[3].S"; + case Attribute::FixedFncTexture3T: + return "FixedFncTexture[3].T"; + case Attribute::FixedFncTexture3R: + return "FixedFncTexture[3].R"; + case Attribute::FixedFncTexture3Q: + return "FixedFncTexture[3].Q"; + case Attribute::FixedFncTexture4S: + return "FixedFncTexture[4].S"; + case Attribute::FixedFncTexture4T: + return "FixedFncTexture[4].T"; + case Attribute::FixedFncTexture4R: + return "FixedFncTexture[4].R"; + case Attribute::FixedFncTexture4Q: + return "FixedFncTexture[4].Q"; + case Attribute::FixedFncTexture5S: + return "FixedFncTexture[5].S"; + case Attribute::FixedFncTexture5T: + return "FixedFncTexture[5].T"; + case Attribute::FixedFncTexture5R: + return "FixedFncTexture[5].R"; + case Attribute::FixedFncTexture5Q: + return "FixedFncTexture[5].Q"; + case Attribute::FixedFncTexture6S: + return "FixedFncTexture[6].S"; + case Attribute::FixedFncTexture6T: + return "FixedFncTexture[6].T"; + case Attribute::FixedFncTexture6R: + return "FixedFncTexture[6].R"; + case Attribute::FixedFncTexture6Q: + return "FixedFncTexture[6].Q"; + case Attribute::FixedFncTexture7S: + return "FixedFncTexture[7].S"; + case Attribute::FixedFncTexture7T: + return "FixedFncTexture[7].T"; + case Attribute::FixedFncTexture7R: + return "FixedFncTexture[7].R"; + case Attribute::FixedFncTexture7Q: + return "FixedFncTexture[7].Q"; + case Attribute::FixedFncTexture8S: + return "FixedFncTexture[8].S"; + case Attribute::FixedFncTexture8T: + return "FixedFncTexture[8].T"; + case Attribute::FixedFncTexture8R: + return "FixedFncTexture[8].R"; + case Attribute::FixedFncTexture8Q: + return "FixedFncTexture[8].Q"; + case Attribute::FixedFncTexture9S: + return "FixedFncTexture[9].S"; + case Attribute::FixedFncTexture9T: + return "FixedFncTexture[9].T"; + case Attribute::FixedFncTexture9R: + return "FixedFncTexture[9].R"; + case Attribute::FixedFncTexture9Q: + return "FixedFncTexture[9].Q"; + case Attribute::ViewportMask: + return "ViewportMask"; + case Attribute::FrontFace: + return "FrontFace"; + } + return fmt::format("", static_cast(attribute)); +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h new file mode 100644 index 000000000..bb2cad6af --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -0,0 +1,242 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Attribute : u64 { + PrimitiveId = 24, + Layer = 25, + ViewportIndex = 26, + PointSize = 27, + PositionX = 28, + PositionY = 29, + PositionZ = 30, + PositionW = 31, + Generic0X = 32, + Generic0Y = 33, + Generic0Z = 34, + Generic0W = 35, + Generic1X = 36, + Generic1Y = 37, + Generic1Z = 38, + Generic1W = 39, + Generic2X = 40, + Generic2Y = 41, + Generic2Z = 42, + Generic2W = 43, + Generic3X = 44, + Generic3Y = 45, + Generic3Z = 46, + Generic3W = 47, + Generic4X = 48, + Generic4Y = 49, + Generic4Z = 50, + Generic4W = 51, + Generic5X = 52, + Generic5Y = 53, + Generic5Z = 54, + Generic5W = 55, + Generic6X = 56, + Generic6Y = 57, + Generic6Z = 58, + Generic6W = 59, + Generic7X = 60, + Generic7Y = 61, + Generic7Z = 62, + Generic7W = 63, + Generic8X = 64, + Generic8Y = 65, + Generic8Z = 66, + Generic8W = 67, + Generic9X = 68, + Generic9Y = 69, + Generic9Z = 70, + Generic9W = 71, + Generic10X = 72, + Generic10Y = 73, + Generic10Z = 74, + Generic10W = 75, + Generic11X = 76, + Generic11Y = 77, + Generic11Z = 78, + Generic11W = 79, + Generic12X = 80, + Generic12Y = 81, + Generic12Z = 82, + Generic12W = 83, + Generic13X = 84, + Generic13Y = 85, + Generic13Z = 86, + Generic13W = 87, + Generic14X = 88, + Generic14Y = 89, + Generic14Z = 90, + Generic14W = 91, + Generic15X = 92, + Generic15Y = 93, + Generic15Z = 94, + Generic15W = 95, + Generic16X = 96, + Generic16Y = 97, + Generic16Z = 98, + Generic16W = 99, + Generic17X = 100, + Generic17Y = 101, + Generic17Z = 102, + Generic17W = 103, + Generic18X = 104, + Generic18Y = 105, + Generic18Z = 106, + Generic18W = 107, + Generic19X = 108, + Generic19Y = 109, + Generic19Z = 110, + Generic19W = 111, + Generic20X = 112, + Generic20Y = 113, + Generic20Z = 114, + Generic20W = 115, + Generic21X = 116, + Generic21Y = 117, + Generic21Z = 118, + Generic21W = 119, + Generic22X = 120, + Generic22Y = 121, + Generic22Z = 122, + Generic22W = 123, + Generic23X = 124, + Generic23Y = 125, + Generic23Z = 126, + Generic23W = 127, + Generic24X = 128, + Generic24Y = 129, + Generic24Z = 130, + Generic24W = 131, + Generic25X = 132, + Generic25Y = 133, + Generic25Z = 134, + Generic25W = 135, + Generic26X = 136, + Generic26Y = 137, + Generic26Z = 138, + Generic26W = 139, + Generic27X = 140, + Generic27Y = 141, + Generic27Z = 142, + Generic27W = 143, + Generic28X = 144, + Generic28Y = 145, + Generic28Z = 146, + Generic28W = 147, + Generic29X = 148, + Generic29Y = 149, + Generic29Z = 150, + Generic29W = 151, + Generic30X = 152, + Generic30Y = 153, + Generic30Z = 154, + Generic30W = 155, + Generic31X = 156, + Generic31Y = 157, + Generic31Z = 158, + Generic31W = 159, + ColorFrontDiffuseR = 160, + ColorFrontDiffuseG = 161, + ColorFrontDiffuseB = 162, + ColorFrontDiffuseA = 163, + ColorFrontSpecularR = 164, + ColorFrontSpecularG = 165, + ColorFrontSpecularB = 166, + ColorFrontSpecularA = 167, + ColorBackDiffuseR = 168, + ColorBackDiffuseG = 169, + ColorBackDiffuseB = 170, + ColorBackDiffuseA = 171, + ColorBackSpecularR = 172, + ColorBackSpecularG = 173, + ColorBackSpecularB = 174, + ColorBackSpecularA = 175, + ClipDistance0 = 176, + ClipDistance1 = 177, + ClipDistance2 = 178, + ClipDistance3 = 179, + ClipDistance4 = 180, + ClipDistance5 = 181, + ClipDistance6 = 182, + ClipDistance7 = 183, + PointSpriteS = 184, + PointSpriteT = 185, + FogCoordinate = 186, + TessellationEvaluationPointU = 188, + TessellationEvaluationPointV = 189, + InstanceId = 190, + VertexId = 191, + FixedFncTexture0S = 192, + FixedFncTexture0T = 193, + FixedFncTexture0R = 194, + FixedFncTexture0Q = 195, + FixedFncTexture1S = 196, + FixedFncTexture1T = 197, + FixedFncTexture1R = 198, + FixedFncTexture1Q = 199, + FixedFncTexture2S = 200, + FixedFncTexture2T = 201, + FixedFncTexture2R = 202, + FixedFncTexture2Q = 203, + FixedFncTexture3S = 204, + FixedFncTexture3T = 205, + FixedFncTexture3R = 206, + FixedFncTexture3Q = 207, + FixedFncTexture4S = 208, + FixedFncTexture4T = 209, + FixedFncTexture4R = 210, + FixedFncTexture4Q = 211, + FixedFncTexture5S = 212, + FixedFncTexture5T = 213, + FixedFncTexture5R = 214, + FixedFncTexture5Q = 215, + FixedFncTexture6S = 216, + FixedFncTexture6T = 217, + FixedFncTexture6R = 218, + FixedFncTexture6Q = 219, + FixedFncTexture7S = 220, + FixedFncTexture7T = 221, + FixedFncTexture7R = 222, + FixedFncTexture7Q = 223, + FixedFncTexture8S = 224, + FixedFncTexture8T = 225, + FixedFncTexture8R = 226, + FixedFncTexture8Q = 227, + FixedFncTexture9S = 228, + FixedFncTexture9T = 229, + FixedFncTexture9R = 230, + FixedFncTexture9Q = 231, + ViewportMask = 232, + FrontFace = 255, +}; + +[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; + +[[nodiscard]] int GenericAttributeIndex(Attribute attribute); + +[[nodiscard]] std::string NameOf(Attribute attribute); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp new file mode 100644 index 000000000..0406726ad --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -0,0 +1,142 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Block::Block(u32 begin, u32 end) : location_begin{begin}, location_end{end} {} + +Block::~Block() = default; + +void Block::AppendNewInst(Opcode op, std::initializer_list args) { + PrependNewInst(end(), op, args); +} + +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list args) { + Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; + const auto result_it{instructions.insert(insertion_point, *inst)}; + + if (inst->NumArgs() != args.size()) { + throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); + } + std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { + inst->SetArg(index, arg); + ++index; + }); + return result_it; +} + +u32 Block::LocationBegin() const noexcept { + return location_begin; +} + +u32 Block::LocationEnd() const noexcept { + return location_end; +} + +Block::InstructionList& Block::Instructions() noexcept { + return instructions; +} + +const Block::InstructionList& Block::Instructions() const noexcept { + return instructions; +} + +static std::string ArgToIndex(const std::map& block_to_index, + const std::map& inst_to_index, + const Value& arg) { + if (arg.IsEmpty()) { + return ""; + } + if (arg.IsLabel()) { + if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$", reinterpret_cast(arg.Label())); + } + if (!arg.IsImmediate()) { + if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { + return fmt::format("%{}", it->second); + } + return fmt::format("%", reinterpret_cast(arg.Inst())); + } + switch (arg.Type()) { + case Type::U1: + return fmt::format("#{}", arg.U1() ? '1' : '0'); + case Type::U8: + return fmt::format("#{}", arg.U8()); + case Type::U16: + return fmt::format("#{}", arg.U16()); + case Type::U32: + return fmt::format("#{}", arg.U32()); + case Type::U64: + return fmt::format("#{}", arg.U64()); + case Type::Reg: + return fmt::format("{}", arg.Reg()); + case Type::Pred: + return fmt::format("{}", arg.Pred()); + case Type::Attribute: + return fmt::format("{}", arg.Attribute()); + default: + return ""; + } +} + +std::string DumpBlock(const Block& block) { + size_t inst_index{0}; + std::map inst_to_index; + return DumpBlock(block, {}, inst_to_index, inst_index); +} + +std::string DumpBlock(const Block& block, const std::map& block_to_index, + std::map& inst_to_index, size_t& inst_index) { + std::string ret{"Block"}; + if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { + ret += fmt::format(" ${}", it->second); + } + ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); + + for (const Inst& inst : block) { + const Opcode op{inst.Opcode()}; + ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); + if (TypeOf(op) != Type::Void) { + ret += fmt::format("%{:<5} = {}", inst_index, op); + } else { + ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces + } + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); + + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } + } + if (TypeOf(op) != Type::Void) { + ret += fmt::format(" (uses: {})\n", inst.UseCount()); + } else { + ret += '\n'; + } + + inst_to_index.emplace(&inst, inst_index); + ++inst_index; + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h new file mode 100644 index 000000000..3ed2eb957 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -0,0 +1,134 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include +#include + +#include "shader_recompiler/frontend/ir/microinstruction.h" + +namespace Shader::IR { + +class Block { +public: + using InstructionList = boost::intrusive::list; + using size_type = InstructionList::size_type; + using iterator = InstructionList::iterator; + using const_iterator = InstructionList::const_iterator; + using reverse_iterator = InstructionList::reverse_iterator; + using const_reverse_iterator = InstructionList::const_reverse_iterator; + + explicit Block(u32 begin, u32 end); + ~Block(); + + Block(const Block&) = delete; + Block& operator=(const Block&) = delete; + + Block(Block&&) = default; + Block& operator=(Block&&) = default; + + /// Appends a new instruction to the end of this basic block. + void AppendNewInst(Opcode op, std::initializer_list args); + + /// Prepends a new instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args); + + /// Gets the starting location of this basic block. + [[nodiscard]] u32 LocationBegin() const noexcept; + /// Gets the end location for this basic block. + [[nodiscard]] u32 LocationEnd() const noexcept; + + /// Gets a mutable reference to the instruction list for this basic block. + InstructionList& Instructions() noexcept; + /// Gets an immutable reference to the instruction list for this basic block. + const InstructionList& Instructions() const noexcept; + + [[nodiscard]] bool empty() const { + return instructions.empty(); + } + [[nodiscard]] size_type size() const { + return instructions.size(); + } + + [[nodiscard]] Inst& front() { + return instructions.front(); + } + [[nodiscard]] const Inst& front() const { + return instructions.front(); + } + + [[nodiscard]] Inst& back() { + return instructions.back(); + } + [[nodiscard]] const Inst& back() const { + return instructions.back(); + } + + [[nodiscard]] iterator begin() { + return instructions.begin(); + } + [[nodiscard]] const_iterator begin() const { + return instructions.begin(); + } + [[nodiscard]] iterator end() { + return instructions.end(); + } + [[nodiscard]] const_iterator end() const { + return instructions.end(); + } + + [[nodiscard]] reverse_iterator rbegin() { + return instructions.rbegin(); + } + [[nodiscard]] const_reverse_iterator rbegin() const { + return instructions.rbegin(); + } + [[nodiscard]] reverse_iterator rend() { + return instructions.rend(); + } + [[nodiscard]] const_reverse_iterator rend() const { + return instructions.rend(); + } + + [[nodiscard]] const_iterator cbegin() const { + return instructions.cbegin(); + } + [[nodiscard]] const_iterator cend() const { + return instructions.cend(); + } + + [[nodiscard]] const_reverse_iterator crbegin() const { + return instructions.crbegin(); + } + [[nodiscard]] const_reverse_iterator crend() const { + return instructions.crend(); + } + +private: + /// Starting location of this block + u32 location_begin; + /// End location of this block + u32 location_end; + + /// List of instructions in this block. + InstructionList instructions; + + /// Memory pool for instruction list + boost::fast_pool_allocator + instruction_alloc_pool; +}; + +[[nodiscard]] std::string DumpBlock(const Block& block); + +[[nodiscard]] std::string DumpBlock(const Block& block, + const std::map& block_to_index, + std::map& inst_to_index, + size_t& inst_index); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp new file mode 100644 index 000000000..edff35dc7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -0,0 +1,31 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include + +#include "shader_recompiler/frontend/ir/condition.h" + +namespace Shader::IR { + +std::string NameOf(Condition condition) { + std::string ret; + if (condition.FlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.FlowTest()); + } + const auto [pred, negated]{condition.Pred()}; + if (pred != Pred::PT || negated) { + if (!ret.empty()) { + ret += '&'; + } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h new file mode 100644 index 000000000..52737025c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/pred.h" + +namespace Shader::IR { + +class Condition { +public: + Condition() noexcept = default; + + explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept + : flow_test{static_cast(flow_test_)}, pred{static_cast(pred_)}, + pred_negated{pred_negated_ ? u8{1} : u8{0}} {} + + explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept + : Condition(FlowTest::T, pred_, pred_negated_) {} + + Condition(bool value) : Condition(Pred::PT, !value) {} + + auto operator<=>(const Condition&) const noexcept = default; + + [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + return static_cast(flow_test); + } + + [[nodiscard]] std::pair Pred() const noexcept { + return {static_cast(pred), pred_negated != 0}; + } + +private: + u16 flow_test; + u8 pred; + u8 pred_negated; +}; + +std::string NameOf(Condition condition); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Condition& cond, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp new file mode 100644 index 000000000..6ebb4ad89 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.cpp @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include + +#include "shader_recompiler/frontend/ir/flow_test.h" + +namespace Shader::IR { + +std::string NameOf(FlowTest flow_test) { + switch (flow_test) { + case FlowTest::F: + return "F"; + case FlowTest::LT: + return "LT"; + case FlowTest::EQ: + return "EQ"; + case FlowTest::LE: + return "LE"; + case FlowTest::GT: + return "GT"; + case FlowTest::NE: + return "NE"; + case FlowTest::GE: + return "GE"; + case FlowTest::NUM: + return "NUM"; + case FlowTest::NaN: + return "NAN"; + case FlowTest::LTU: + return "LTU"; + case FlowTest::EQU: + return "EQU"; + case FlowTest::LEU: + return "LEU"; + case FlowTest::GTU: + return "GTU"; + case FlowTest::NEU: + return "NEU"; + case FlowTest::GEU: + return "GEU"; + case FlowTest::T: + return "T"; + case FlowTest::OFF: + return "OFF"; + case FlowTest::LO: + return "LO"; + case FlowTest::SFF: + return "SFF"; + case FlowTest::LS: + return "LS"; + case FlowTest::HI: + return "HI"; + case FlowTest::SFT: + return "SFT"; + case FlowTest::HS: + return "HS"; + case FlowTest::OFT: + return "OFT"; + case FlowTest::CSM_TA: + return "CSM_TA"; + case FlowTest::CSM_TR: + return "CSM_TR"; + case FlowTest::CSM_MX: + return "CSM_MX"; + case FlowTest::FCSM_TA: + return "FCSM_TA"; + case FlowTest::FCSM_TR: + return "FCSM_TR"; + case FlowTest::FCSM_MX: + return "FCSM_MX"; + case FlowTest::RLE: + return "RLE"; + case FlowTest::RGT: + return "RGT"; + } + return fmt::format("", static_cast(flow_test)); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h new file mode 100644 index 000000000..ac883da13 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.h @@ -0,0 +1,61 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +namespace Shader::IR { + +enum class FlowTest { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + NaN, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, + OFF, + LO, + SFF, + LS, + HI, + SFT, + HS, + OFT, + CSM_TA, + CSM_TR, + CSM_MX, + FCSM_TA, + FCSM_TR, + FCSM_MX, + RLE, + RGT, +}; + +[[nodiscard]] std::string NameOf(FlowTest flow_test); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp new file mode 100644 index 000000000..6450e4b2c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -0,0 +1,533 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_cast.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +[[noreturn]] static void ThrowInvalidType(Type type) { + throw InvalidArgument("Invalid type {}", type); +} + +U1 IREmitter::Imm1(bool value) const { + return U1{Value{value}}; +} + +U8 IREmitter::Imm8(u8 value) const { + return U8{Value{value}}; +} + +U16 IREmitter::Imm16(u16 value) const { + return U16{Value{value}}; +} + +U32 IREmitter::Imm32(u32 value) const { + return U32{Value{value}}; +} + +U32 IREmitter::Imm32(s32 value) const { + return U32{Value{static_cast(value)}}; +} + +U32 IREmitter::Imm32(f32 value) const { + return U32{Value{Common::BitCast(value)}}; +} + +U64 IREmitter::Imm64(u64 value) const { + return U64{Value{value}}; +} + +U64 IREmitter::Imm64(f64 value) const { + return U64{Value{Common::BitCast(value)}}; +} + +void IREmitter::Branch(IR::Block* label) { + Inst(Opcode::Branch, label); +} + +void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) { + Inst(Opcode::BranchConditional, cond, true_label, false_label); +} + +void IREmitter::Exit() { + Inst(Opcode::Exit); +} + +void IREmitter::Return() { + Inst(Opcode::Return); +} + +void IREmitter::Unreachable() { + Inst(Opcode::Unreachable); +} + +U32 IREmitter::GetReg(IR::Reg reg) { + return Inst(Opcode::GetRegister, reg); +} + +void IREmitter::SetReg(IR::Reg reg, const U32& value) { + Inst(Opcode::SetRegister, reg, value); +} + +U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { + const U1 value{Inst(Opcode::GetPred, pred)}; + if (is_negated) { + return Inst(Opcode::LogicalNot, value); + } else { + return value; + } +} + +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + Inst(Opcode::SetPred, pred, value); +} + +U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { + return Inst(Opcode::GetCbuf, binding, byte_offset); +} + +U1 IREmitter::GetZFlag() { + return Inst(Opcode::GetZFlag); +} + +U1 IREmitter::GetSFlag() { + return Inst(Opcode::GetSFlag); +} + +U1 IREmitter::GetCFlag() { + return Inst(Opcode::GetCFlag); +} + +U1 IREmitter::GetOFlag() { + return Inst(Opcode::GetOFlag); +} + +void IREmitter::SetZFlag(const U1& value) { + Inst(Opcode::SetZFlag, value); +} + +void IREmitter::SetSFlag(const U1& value) { + Inst(Opcode::SetSFlag, value); +} + +void IREmitter::SetCFlag(const U1& value) { + Inst(Opcode::SetCFlag, value); +} + +void IREmitter::SetOFlag(const U1& value) { + Inst(Opcode::SetOFlag, value); +} + +U32 IREmitter::GetAttribute(IR::Attribute attribute) { + return Inst(Opcode::GetAttribute, attribute); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { + Inst(Opcode::SetAttribute, attribute, value); +} + +void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalU8, address, value); +} + +void IREmitter::WriteGlobalS8(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalS8, address, value); +} + +void IREmitter::WriteGlobalU16(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalU16, address, value); +} + +void IREmitter::WriteGlobalS16(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalS16, address, value); +} + +void IREmitter::WriteGlobal32(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobal32, address, value); +} + +void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) { + Inst(Opcode::WriteGlobal64, address, vector); +} + +void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { + Inst(Opcode::WriteGlobal128, address, vector); +} + +U1 IREmitter::GetZeroFromOp(const Value& op) { + return Inst(Opcode::GetZeroFromOp, op); +} + +U1 IREmitter::GetSignFromOp(const Value& op) { + return Inst(Opcode::GetSignFromOp, op); +} + +U1 IREmitter::GetCarryFromOp(const Value& op) { + return Inst(Opcode::GetCarryFromOp, op); +} + +U1 IREmitter::GetOverflowFromOp(const Value& op) { + return Inst(Opcode::GetOverflowFromOp, op); +} + +U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { + if (a.Type() != a.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst(Opcode::FPAdd16, a, b); + case Type::U32: + return Inst(Opcode::FPAdd32, a, b); + case Type::U64: + return Inst(Opcode::FPAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { + if (e1.Type() != e2.Type()) { + throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); + } + return Inst(Opcode::CompositeConstruct2, e1, e2); +} + +Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { + throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); + } + return Inst(Opcode::CompositeConstruct3, e1, e2, e3); +} + +Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, + const UAny& e4) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { + throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), + e4.Type()); + } + return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); +} + +UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { + if (element >= 4) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(Opcode::CompositeExtract, vector, Imm32(static_cast(element))); +} + +U64 IREmitter::PackUint2x32(const Value& vector) { + return Inst(Opcode::PackUint2x32, vector); +} + +Value IREmitter::UnpackUint2x32(const U64& value) { + return Inst(Opcode::UnpackUint2x32, value); +} + +U32 IREmitter::PackFloat2x16(const Value& vector) { + return Inst(Opcode::PackFloat2x16, vector); +} + +Value IREmitter::UnpackFloat2x16(const U32& value) { + return Inst(Opcode::UnpackFloat2x16, value); +} + +U64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst(Opcode::PackDouble2x32, vector); +} + +Value IREmitter::UnpackDouble2x32(const U64& value) { + return Inst(Opcode::UnpackDouble2x32, value); +} + +U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst(Opcode::FPMul16, a, b); + case Type::U32: + return Inst(Opcode::FPMul32, a, b); + case Type::U64: + return Inst(Opcode::FPMul64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U16U32U64 IREmitter::FPAbs(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPAbs16, value); + case Type::U32: + return Inst(Opcode::FPAbs32, value); + case Type::U64: + return Inst(Opcode::FPAbs64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPNeg(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPNeg16, value); + case Type::U32: + return Inst(Opcode::FPNeg32, value); + case Type::U64: + return Inst(Opcode::FPNeg64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { + U16U32U64 result{value}; + if (abs) { + result = FPAbs(value); + } + if (neg) { + result = FPNeg(value); + } + return result; +} + +U32 IREmitter::FPCosNotReduced(const U32& value) { + return Inst(Opcode::FPCosNotReduced, value); +} + +U32 IREmitter::FPExp2NotReduced(const U32& value) { + return Inst(Opcode::FPExp2NotReduced, value); +} + +U32 IREmitter::FPLog2(const U32& value) { + return Inst(Opcode::FPLog2, value); +} + +U32U64 IREmitter::FPRecip(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::FPRecip32, value); + case Type::U64: + return Inst(Opcode::FPRecip64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::FPRecipSqrt(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::FPRecipSqrt32, value); + case Type::U64: + return Inst(Opcode::FPRecipSqrt64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::FPSinNotReduced(const U32& value) { + return Inst(Opcode::FPSinNotReduced, value); +} + +U32 IREmitter::FPSqrt(const U32& value) { + return Inst(Opcode::FPSqrt, value); +} + +U16U32U64 IREmitter::FPSaturate(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPSaturate16, value); + case Type::U32: + return Inst(Opcode::FPSaturate32, value); + case Type::U64: + return Inst(Opcode::FPSaturate64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPRoundEven(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPRoundEven16, value); + case Type::U32: + return Inst(Opcode::FPRoundEven32, value); + case Type::U64: + return Inst(Opcode::FPRoundEven64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPFloor(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPFloor16, value); + case Type::U32: + return Inst(Opcode::FPFloor32, value); + case Type::U64: + return Inst(Opcode::FPFloor64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPCeil(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPCeil16, value); + case Type::U32: + return Inst(Opcode::FPCeil32, value); + case Type::U64: + return Inst(Opcode::FPCeil64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPTrunc16, value); + case Type::U32: + return Inst(Opcode::FPTrunc32, value); + case Type::U64: + return Inst(Opcode::FPTrunc64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U1 IREmitter::LogicalOr(const U1& a, const U1& b) { + return Inst(Opcode::LogicalOr, a, b); +} + +U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { + return Inst(Opcode::LogicalAnd, a, b); +} + +U1 IREmitter::LogicalNot(const U1& value) { + return Inst(Opcode::LogicalNot, value); +} + +U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertS16F16, value); + case Type::U32: + return Inst(Opcode::ConvertS16F32, value); + case Type::U64: + return Inst(Opcode::ConvertS16F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertS32F16, value); + case Type::U32: + return Inst(Opcode::ConvertS32F32, value); + case Type::U64: + return Inst(Opcode::ConvertS32F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertS64F16, value); + case Type::U32: + return Inst(Opcode::ConvertS64F32, value); + case Type::U64: + return Inst(Opcode::ConvertS64F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU16F16, value); + case Type::U32: + return Inst(Opcode::ConvertU16F32, value); + case Type::U64: + return Inst(Opcode::ConvertU16F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU32F16, value); + case Type::U32: + return Inst(Opcode::ConvertU32F32, value); + case Type::U64: + return Inst(Opcode::ConvertU32F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU64F16, value); + case Type::U32: + return Inst(Opcode::ConvertU64F32, value); + case Type::U64: + return Inst(Opcode::ConvertU64F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value) { + if (is_signed) { + return ConvertFToS(bitsize, value); + } else { + return ConvertFToU(bitsize, value); + } +} + +U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 32: + switch (value.Type()) { + case Type::U32: + // Nothing to do + return value; + case Type::U64: + return Inst(Opcode::ConvertU32U64, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::U32: + // Nothing to do + return value; + case Type::U64: + return Inst(Opcode::ConvertU64U32, value); + default: + break; + } + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h new file mode 100644 index 000000000..1af79f41c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -0,0 +1,123 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class IREmitter { +public: + explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + + Block& block; + + [[nodiscard]] U1 Imm1(bool value) const; + [[nodiscard]] U8 Imm8(u8 value) const; + [[nodiscard]] U16 Imm16(u16 value) const; + [[nodiscard]] U32 Imm32(u32 value) const; + [[nodiscard]] U32 Imm32(s32 value) const; + [[nodiscard]] U32 Imm32(f32 value) const; + [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(f64 value) const; + + void Branch(IR::Block* label); + void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); + void Exit(); + void Return(); + void Unreachable(); + + [[nodiscard]] U32 GetReg(IR::Reg reg); + void SetReg(IR::Reg reg, const U32& value); + + [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); + void SetPred(IR::Pred pred, const U1& value); + + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); + + [[nodiscard]] U1 GetZFlag(); + [[nodiscard]] U1 GetSFlag(); + [[nodiscard]] U1 GetCFlag(); + [[nodiscard]] U1 GetOFlag(); + + void SetZFlag(const U1& value); + void SetSFlag(const U1& value); + void SetCFlag(const U1& value); + void SetOFlag(const U1& value); + + [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); + void SetAttribute(IR::Attribute attribute, const U32& value); + + void WriteGlobalU8(const U64& address, const U32& value); + void WriteGlobalS8(const U64& address, const U32& value); + void WriteGlobalU16(const U64& address, const U32& value); + void WriteGlobalS16(const U64& address, const U32& value); + void WriteGlobal32(const U64& address, const U32& value); + void WriteGlobal64(const U64& address, const IR::Value& vector); + void WriteGlobal128(const U64& address, const IR::Value& vector); + + [[nodiscard]] U1 GetZeroFromOp(const Value& op); + [[nodiscard]] U1 GetSignFromOp(const Value& op); + [[nodiscard]] U1 GetCarryFromOp(const Value& op); + [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + + [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2); + [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3); + [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, + const UAny& e4); + [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + + [[nodiscard]] U64 PackUint2x32(const Value& vector); + [[nodiscard]] Value UnpackUint2x32(const U64& value); + + [[nodiscard]] U32 PackFloat2x16(const Value& vector); + [[nodiscard]] Value UnpackFloat2x16(const U32& value); + + [[nodiscard]] U64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const U64& value); + + [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); + [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); + + [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPAbsNeg(const U16U32U64& value, bool abs, bool neg); + + [[nodiscard]] U32 FPCosNotReduced(const U32& value); + [[nodiscard]] U32 FPExp2NotReduced(const U32& value); + [[nodiscard]] U32 FPLog2(const U32& value); + [[nodiscard]] U32U64 FPRecip(const U32U64& value); + [[nodiscard]] U32U64 FPRecipSqrt(const U32U64& value); + [[nodiscard]] U32 FPSinNotReduced(const U32& value); + [[nodiscard]] U32 FPSqrt(const U32& value); + [[nodiscard]] U16U32U64 FPSaturate(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPRoundEven(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPFloor(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalNot(const U1& value); + + [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); + [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); + [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); + + [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value); + +private: + IR::Block::iterator insertion_point; + + template + T Inst(Opcode op, Args... args) { + auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; + return T{Value{&*it}}; + } +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp new file mode 100644 index 000000000..553fec3b7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -0,0 +1,189 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { + if (inst && inst->Opcode() != opcode) { + throw LogicError("Invalid pseudo-instruction"); + } +} + +static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { + if (dest_inst) { + throw LogicError("Only one of each type of pseudo-op allowed"); + } + dest_inst = pseudo_inst; +} + +static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { + if (inst->Opcode() != expected_opcode) { + throw LogicError("Undoing use of invalid pseudo-op"); + } + inst = nullptr; +} + +bool Inst::MayHaveSideEffects() const noexcept { + switch (op) { + case Opcode::SetAttribute: + case Opcode::SetAttributeIndexed: + case Opcode::WriteGlobalU8: + case Opcode::WriteGlobalS8: + case Opcode::WriteGlobalU16: + case Opcode::WriteGlobalS16: + case Opcode::WriteGlobal32: + case Opcode::WriteGlobal64: + case Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + +bool Inst::IsPseudoInstruction() const noexcept { + switch (op) { + case Opcode::GetZeroFromOp: + case Opcode::GetSignFromOp: + case Opcode::GetCarryFromOp: + case Opcode::GetOverflowFromOp: + case Opcode::GetZSCOFromOp: + return true; + default: + return false; + } +} + +bool Inst::HasAssociatedPseudoOperation() const noexcept { + return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst; +} + +Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { + // This is faster than doing a search through the block. + switch (opcode) { + case Opcode::GetZeroFromOp: + CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); + return zero_inst; + case Opcode::GetSignFromOp: + CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); + return sign_inst; + case Opcode::GetCarryFromOp: + CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); + return carry_inst; + case Opcode::GetOverflowFromOp: + CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); + return overflow_inst; + case Opcode::GetZSCOFromOp: + CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp); + return zsco_inst; + default: + throw InvalidArgument("{} is not a pseudo-instruction", opcode); + } +} + +size_t Inst::NumArgs() const { + return NumArgsOf(op); +} + +IR::Type Inst::Type() const { + return TypeOf(op); +} + +Value Inst::Arg(size_t index) const { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + return args[index]; +} + +void Inst::SetArg(size_t index, Value value) { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + if (!args[index].IsImmediate()) { + UndoUse(args[index]); + } + if (!value.IsImmediate()) { + Use(value); + } + args[index] = value; +} + +void Inst::Invalidate() { + ClearArgs(); + op = Opcode::Void; +} + +void Inst::ClearArgs() { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + value = {}; + } +} + +void Inst::ReplaceUsesWith(Value replacement) { + Invalidate(); + + op = Opcode::Identity; + + if (!replacement.IsImmediate()) { + Use(replacement); + } + args[0] = replacement; +} + +void Inst::Use(const Value& value) { + ++value.Inst()->use_count; + + switch (op) { + case Opcode::GetZeroFromOp: + SetPseudoInstruction(value.Inst()->zero_inst, this); + break; + case Opcode::GetSignFromOp: + SetPseudoInstruction(value.Inst()->sign_inst, this); + break; + case Opcode::GetCarryFromOp: + SetPseudoInstruction(value.Inst()->carry_inst, this); + break; + case Opcode::GetOverflowFromOp: + SetPseudoInstruction(value.Inst()->overflow_inst, this); + break; + case Opcode::GetZSCOFromOp: + SetPseudoInstruction(value.Inst()->zsco_inst, this); + break; + default: + break; + } +} + +void Inst::UndoUse(const Value& value) { + --value.Inst()->use_count; + + switch (op) { + case Opcode::GetZeroFromOp: + RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp); + break; + case Opcode::GetSignFromOp: + RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp); + break; + case Opcode::GetCarryFromOp: + RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp); + break; + case Opcode::GetOverflowFromOp: + RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); + break; + case Opcode::GetZSCOFromOp: + RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp); + break; + default: + break; + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h new file mode 100644 index 000000000..43460b950 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +constexpr size_t MAX_ARG_COUNT = 4; + +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(Opcode op_) noexcept : op(op_) {} + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode Opcode() const noexcept { + return op; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines whether or not this instruction is a pseudo-instruction. + /// Pseudo-instructions depend on their parent instructions for their semantics. + [[nodiscard]] bool IsPseudoInstruction() const noexcept; + + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; + /// Gets a pseudo-operation associated with this instruction + [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const; + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const; + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + +private: + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + std::array args{}; + Inst* zero_inst{}; + Inst* sign_inst{}; + Inst* carry_inst{}; + Inst* overflow_inst{}; + Inst* zsco_inst{}; + u64 flags{}; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.cpp b/src/shader_recompiler/frontend/ir/opcode.cpp new file mode 100644 index 000000000..65d074029 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcode.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/opcode.h" + +namespace Shader::IR { +namespace { +struct OpcodeMeta { + std::string_view name; + Type type; + std::array arg_types; +}; + +using enum Type; + +constexpr std::array META_TABLE{ +#define OPCODE(name_token, type_token, ...) \ + OpcodeMeta{ \ + .name{#name_token}, \ + .type{type_token}, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcode.inc" +#undef OPCODE +}; + +void ValidateOpcode(Opcode op) { + const size_t raw{static_cast(op)}; + if (raw >= META_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", raw); + } +} +} // Anonymous namespace + +Type TypeOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast(op)].type; +} + +size_t NumArgsOf(Opcode op) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; + return static_cast(distance); +} + +Type ArgTypeOf(Opcode op, size_t arg_index) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { + throw InvalidArgument("Out of bounds argument"); + } + return arg_types[arg_index]; +} + +std::string_view NameOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast(op)].name; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.h b/src/shader_recompiler/frontend/ir/opcode.h new file mode 100644 index 000000000..1f4440379 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcode.h @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +enum class Opcode { +#define OPCODE(name, ...) name, +#include "opcode.inc" +#undef OPCODE +}; + +/// Get return type of an opcode +[[nodiscard]] Type TypeOf(Opcode op); + +/// Get the number of arguments an opcode accepts +[[nodiscard]] size_t NumArgsOf(Opcode op); + +/// Get the required type of an argument of an opcode +[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); + +/// Get the name of an opcode +[[nodiscard]] std::string_view NameOf(Opcode op); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { + return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc new file mode 100644 index 000000000..371064bf3 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -0,0 +1,142 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Void, Void, ) +OPCODE(Identity, Opaque, Opaque, ) + +// Control flow +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(Exit, Void, ) +OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) + +// Context getters/setters +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(GetZSCORaw, U32, ) +OPCODE(SetZSCORaw, Void, U32, ) +OPCODE(SetZSCO, Void, ZSCO, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) + +// Memory operations +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, Opaque, ) +OPCODE(WriteGlobal128, Void, U64, Opaque, ) + +// Vector utility +OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) +OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(CompositeExtract, Opaque, Opaque, U32, ) + +// Bitwise conversions +OPCODE(PackUint2x32, U64, Opaque, ) +OPCODE(UnpackUint2x32, Opaque, U64, ) +OPCODE(PackFloat2x16, U32, Opaque, ) +OPCODE(UnpackFloat2x16, Opaque, U32, ) +OPCODE(PackDouble2x32, U64, Opaque, ) +OPCODE(UnpackDouble2x32, Opaque, U64, ) + +// Pseudo-operation, handled specially at final emit +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) + +// Floating-point operations +OPCODE(FPAbs16, U16, U16 ) +OPCODE(FPAbs32, U32, U32 ) +OPCODE(FPAbs64, U64, U64 ) +OPCODE(FPAdd16, U16, U16, U16 ) +OPCODE(FPAdd32, U32, U32, U32 ) +OPCODE(FPAdd64, U64, U64, U64 ) +OPCODE(FPFma16, U16, U16, U16 ) +OPCODE(FPFma32, U32, U32, U32 ) +OPCODE(FPFma64, U64, U64, U64 ) +OPCODE(FPMax32, U32, U32, U32 ) +OPCODE(FPMax64, U64, U64, U64 ) +OPCODE(FPMin32, U32, U32, U32 ) +OPCODE(FPMin64, U64, U64, U64 ) +OPCODE(FPMul16, U16, U16, U16 ) +OPCODE(FPMul32, U32, U32, U32 ) +OPCODE(FPMul64, U64, U64, U64 ) +OPCODE(FPNeg16, U16, U16 ) +OPCODE(FPNeg32, U32, U32 ) +OPCODE(FPNeg64, U64, U64 ) +OPCODE(FPRecip32, U32, U32 ) +OPCODE(FPRecip64, U64, U64 ) +OPCODE(FPRecipSqrt32, U32, U32 ) +OPCODE(FPRecipSqrt64, U64, U64 ) +OPCODE(FPSqrt, U32, U32 ) +OPCODE(FPSin, U32, U32 ) +OPCODE(FPSinNotReduced, U32, U32 ) +OPCODE(FPExp2, U32, U32 ) +OPCODE(FPExp2NotReduced, U32, U32 ) +OPCODE(FPCos, U32, U32 ) +OPCODE(FPCosNotReduced, U32, U32 ) +OPCODE(FPLog2, U32, U32 ) +OPCODE(FPSaturate16, U16, U16 ) +OPCODE(FPSaturate32, U32, U32 ) +OPCODE(FPSaturate64, U64, U64 ) +OPCODE(FPRoundEven16, U16, U16 ) +OPCODE(FPRoundEven32, U32, U32 ) +OPCODE(FPRoundEven64, U64, U64 ) +OPCODE(FPFloor16, U16, U16 ) +OPCODE(FPFloor32, U32, U32 ) +OPCODE(FPFloor64, U64, U64 ) +OPCODE(FPCeil16, U16, U16 ) +OPCODE(FPCeil32, U32, U32 ) +OPCODE(FPCeil64, U64, U64 ) +OPCODE(FPTrunc16, U16, U16 ) +OPCODE(FPTrunc32, U32, U32 ) +OPCODE(FPTrunc64, U64, U64 ) + +// Logical operations +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) + +// Conversion operations +OPCODE(ConvertS16F16, U32, U16, ) +OPCODE(ConvertS16F32, U32, U32, ) +OPCODE(ConvertS16F64, U32, U64, ) +OPCODE(ConvertS32F16, U32, U16, ) +OPCODE(ConvertS32F32, U32, U32, ) +OPCODE(ConvertS32F64, U32, U64, ) +OPCODE(ConvertS64F16, U64, U16, ) +OPCODE(ConvertS64F32, U64, U32, ) +OPCODE(ConvertS64F64, U64, U64, ) +OPCODE(ConvertU16F16, U32, U16, ) +OPCODE(ConvertU16F32, U32, U32, ) +OPCODE(ConvertU16F64, U32, U64, ) +OPCODE(ConvertU32F16, U32, U16, ) +OPCODE(ConvertU32F32, U32, U32, ) +OPCODE(ConvertU32F64, U32, U64, ) +OPCODE(ConvertU64F16, U64, U16, ) +OPCODE(ConvertU64F32, U64, U32, ) +OPCODE(ConvertU64F64, U64, U64, ) + +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h new file mode 100644 index 000000000..37cc53006 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::IR { + +enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Pred& pred, FormatContext& ctx) { + if (pred == Shader::IR::Pred::PT) { + return fmt::format_to(ctx.out(), "PT"); + } else { + return fmt::format_to(ctx.out(), "P{}", static_cast(pred)); + } + } +}; diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h new file mode 100644 index 000000000..316fc4be8 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -0,0 +1,314 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Reg : u64 { + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, + R29, + R30, + R31, + R32, + R33, + R34, + R35, + R36, + R37, + R38, + R39, + R40, + R41, + R42, + R43, + R44, + R45, + R46, + R47, + R48, + R49, + R50, + R51, + R52, + R53, + R54, + R55, + R56, + R57, + R58, + R59, + R60, + R61, + R62, + R63, + R64, + R65, + R66, + R67, + R68, + R69, + R70, + R71, + R72, + R73, + R74, + R75, + R76, + R77, + R78, + R79, + R80, + R81, + R82, + R83, + R84, + R85, + R86, + R87, + R88, + R89, + R90, + R91, + R92, + R93, + R94, + R95, + R96, + R97, + R98, + R99, + R100, + R101, + R102, + R103, + R104, + R105, + R106, + R107, + R108, + R109, + R110, + R111, + R112, + R113, + R114, + R115, + R116, + R117, + R118, + R119, + R120, + R121, + R122, + R123, + R124, + R125, + R126, + R127, + R128, + R129, + R130, + R131, + R132, + R133, + R134, + R135, + R136, + R137, + R138, + R139, + R140, + R141, + R142, + R143, + R144, + R145, + R146, + R147, + R148, + R149, + R150, + R151, + R152, + R153, + R154, + R155, + R156, + R157, + R158, + R159, + R160, + R161, + R162, + R163, + R164, + R165, + R166, + R167, + R168, + R169, + R170, + R171, + R172, + R173, + R174, + R175, + R176, + R177, + R178, + R179, + R180, + R181, + R182, + R183, + R184, + R185, + R186, + R187, + R188, + R189, + R190, + R191, + R192, + R193, + R194, + R195, + R196, + R197, + R198, + R199, + R200, + R201, + R202, + R203, + R204, + R205, + R206, + R207, + R208, + R209, + R210, + R211, + R212, + R213, + R214, + R215, + R216, + R217, + R218, + R219, + R220, + R221, + R222, + R223, + R224, + R225, + R226, + R227, + R228, + R229, + R230, + R231, + R232, + R233, + R234, + R235, + R236, + R237, + R238, + R239, + R240, + R241, + R242, + R243, + R244, + R245, + R246, + R247, + R248, + R249, + R250, + R251, + R252, + R253, + R254, + RZ, +}; +static_assert(static_cast(Reg::RZ) == 255); + +[[nodiscard]] constexpr Reg operator+(Reg reg, int num) { + if (reg == Reg::RZ) { + // Adding or subtracting registers from RZ yields RZ + return Reg::RZ; + } + const int result{static_cast(reg) + num}; + if (result >= static_cast(Reg::RZ)) { + throw LogicError("Overflow on register arithmetic"); + } + if (result < 0) { + throw LogicError("Underflow on register arithmetic"); + } + return static_cast(result); +} + +[[nodiscard]] constexpr Reg operator-(Reg reg, int num) { + return reg + (-num); +} + +[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { + return (static_cast(reg) / align) * align == static_cast(reg); +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Reg& reg, FormatContext& ctx) { + if (reg == Shader::IR::Reg::RZ) { + return fmt::format_to(ctx.out(), "RZ"); + } else if (static_cast(reg) >= 0 && static_cast(reg) < 255) { + return fmt::format_to(ctx.out(), "R{}", static_cast(reg)); + } else { + throw Shader::LogicError("Invalid register with raw value {}", static_cast(reg)); + } + } +}; diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp new file mode 100644 index 000000000..da1e2a0f6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +std::string NameOf(Type type) { + static constexpr std::array names{ + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO", + }; + const size_t bits{static_cast(type)}; + if (bits == 0) { + return "Void"; + } + std::string result; + for (size_t i = 0; i < names.size(); i++) { + if ((bits & (size_t{1} << i)) != 0) { + if (!result.empty()) { + result += '|'; + } + result += names[i]; + } + } + return result; +} + +bool AreTypesCompatible(Type lhs, Type rhs) noexcept { + return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h new file mode 100644 index 000000000..f753628e8 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.h @@ -0,0 +1,47 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_funcs.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Type { + Void = 0, + Opaque = 1 << 0, + Label = 1 << 1, + Reg = 1 << 2, + Pred = 1 << 3, + Attribute = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + ZSCO = 1 << 10, +}; +DECLARE_ENUM_FLAG_OPERATORS(Type) + +[[nodiscard]] std::string NameOf(Type type); + +[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept; + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Type& type, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", NameOf(type)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp new file mode 100644 index 000000000..7b5b35d6c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -0,0 +1,124 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} + +Value::Value(IR::Block* value) noexcept : type{Type::Label}, label{value} {} + +Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} + +Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} + +Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} + +Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} + +Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} + +Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} + +Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} + +Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} + +bool Value::IsIdentity() const noexcept { + return type == Type::Opaque && inst->Opcode() == Opcode::Identity; +} + +bool Value::IsEmpty() const noexcept { + return type == Type::Void; +} + +bool Value::IsImmediate() const noexcept { + if (IsIdentity()) { + return inst->Arg(0).IsImmediate(); + } + return type != Type::Opaque; +} + +bool Value::IsLabel() const noexcept { + return type == Type::Label; +} + +IR::Type Value::Type() const noexcept { + if (IsIdentity()) { + return inst->Arg(0).Type(); + } + if (type == Type::Opaque) { + return inst->Type(); + } + return type; +} + +IR::Inst* Value::Inst() const { + ValidateAccess(Type::Opaque); + return inst; +} + +IR::Block* Value::Label() const { + ValidateAccess(Type::Label); + return label; +} + +IR::Inst* Value::InstRecursive() const { + ValidateAccess(Type::Opaque); + if (IsIdentity()) { + return inst->Arg(0).InstRecursive(); + } + return inst; +} + +IR::Reg Value::Reg() const { + ValidateAccess(Type::Reg); + return reg; +} + +IR::Pred Value::Pred() const { + ValidateAccess(Type::Pred); + return pred; +} + +IR::Attribute Value::Attribute() const { + ValidateAccess(Type::Attribute); + return attribute; +} + +bool Value::U1() const { + ValidateAccess(Type::U1); + return imm_u1; +} + +u8 Value::U8() const { + ValidateAccess(Type::U8); + return imm_u8; +} + +u16 Value::U16() const { + ValidateAccess(Type::U16); + return imm_u16; +} + +u32 Value::U32() const { + ValidateAccess(Type::U32); + return imm_u32; +} + +u64 Value::U64() const { + ValidateAccess(Type::U64); + return imm_u64; +} + +void Value::ValidateAccess(IR::Type expected) const { + if (type != expected) { + throw LogicError("Reading {} out of {}", expected, type); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h new file mode 100644 index 000000000..664dacf9d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.h @@ -0,0 +1,98 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +class Block; +class Inst; + +class Value { +public: + Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} + explicit Value(IR::Inst* value) noexcept; + explicit Value(IR::Block* value) noexcept; + explicit Value(IR::Reg value) noexcept; + explicit Value(IR::Pred value) noexcept; + explicit Value(IR::Attribute value) noexcept; + explicit Value(bool value) noexcept; + explicit Value(u8 value) noexcept; + explicit Value(u16 value) noexcept; + explicit Value(u32 value) noexcept; + explicit Value(u64 value) noexcept; + + [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsEmpty() const noexcept; + [[nodiscard]] bool IsImmediate() const noexcept; + [[nodiscard]] bool IsLabel() const noexcept; + [[nodiscard]] IR::Type Type() const noexcept; + + [[nodiscard]] IR::Inst* Inst() const; + [[nodiscard]] IR::Block* Label() const; + [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Reg Reg() const; + [[nodiscard]] IR::Pred Pred() const; + [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] bool U1() const; + [[nodiscard]] u8 U8() const; + [[nodiscard]] u16 U16() const; + [[nodiscard]] u32 U32() const; + [[nodiscard]] u64 U64() const; + +private: + void ValidateAccess(IR::Type expected) const; + + IR::Type type; + union { + IR::Inst* inst; + IR::Block* label; + IR::Reg reg; + IR::Pred pred; + IR::Attribute attribute; + bool imm_u1; + u8 imm_u8; + u16 imm_u16; + u32 imm_u32; + u64 imm_u64; + }; +}; + +template +class TypedValue : public Value { +public: + TypedValue() = default; + + template + requires((other_type & type_) != IR::Type::Void) explicit(false) + TypedValue(const TypedValue& value) + : Value(value) {} + + explicit TypedValue(const Value& value) : Value(value) { + if ((value.Type() & type_) == IR::Type::Void) { + throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); + } + } + + explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} +}; + +using U1 = TypedValue; +using U8 = TypedValue; +using U16 = TypedValue; +using U32 = TypedValue; +using U64 = TypedValue; +using U32U64 = TypedValue; +using U16U32U64 = TypedValue; +using UAny = TypedValue; +using ZSCO = TypedValue; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..fc4dba826 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -0,0 +1,531 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell::Flow { + +static u32 BranchOffset(Location pc, Instruction inst) { + return pc.Offset() + inst.branch.Offset() + 8; +} + +static std::array Split(Block&& block, Location pc, BlockId new_id) { + if (pc <= block.begin || pc >= block.end) { + throw InvalidArgument("Invalid address to split={}", pc); + } + return { + Block{ + .begin{block.begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{block.id}, + .stack{block.stack}, + .cond{true}, + .branch_true{new_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }, + Block{ + .begin{pc}, + .end{block.end}, + .end_class{block.end_class}, + .id{new_id}, + .stack{std::move(block.stack)}, + .cond{block.cond}, + .branch_true{block.branch_true}, + .branch_false{block.branch_false}, + }, + }; +} + +static Token OpcodeToken(Opcode opcode) { + switch (opcode) { + case Opcode::PBK: + case Opcode::BRK: + return Token::PBK; + case Opcode::PCNT: + case Opcode::CONT: + return Token::PBK; + case Opcode::PEXIT: + case Opcode::EXIT: + return Token::PEXIT; + case Opcode::PLONGJMP: + case Opcode::LONGJMP: + return Token::PLONGJMP; + case Opcode::PRET: + case Opcode::RET: + case Opcode::CAL: + return Token::PRET; + case Opcode::SSY: + case Opcode::SYNC: + return Token::SSY; + default: + throw InvalidArgument("{}", opcode); + } +} + +static bool IsAbsoluteJump(Opcode opcode) { + switch (opcode) { + case Opcode::JCAL: + case Opcode::JMP: + case Opcode::JMX: + return true; + default: + return false; + } +} + +static bool HasFlowTest(Opcode opcode) { + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::EXIT: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::RET: + case Opcode::SYNC: + return true; + case Opcode::CAL: + case Opcode::JCAL: + return false; + default: + throw InvalidArgument("Invalid branch {}", opcode); + } +} + +static std::string Name(const Block& block) { + if (block.begin.IsVirtual()) { + return fmt::format("\"Virtual {}\"", block.id); + } else { + return fmt::format("\"{}\"", block.begin); + } +} + +void Stack::Push(Token token, Location target) { + entries.push_back({ + .token{token}, + .target{target}, + }); +} + +std::pair Stack::Pop(Token token) const { + const std::optional pc{Peek(token)}; + if (!pc) { + throw LogicError("Token could not be found"); + } + return {*pc, Remove(token)}; +} + +std::optional Stack::Peek(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + if (it == reverse_entries.end()) { + return std::nullopt; + } + return it->target; +} + +Stack Stack::Remove(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + const auto pos{std::distance(reverse_entries.begin(), it)}; + Stack result; + result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); + return result; +} + +bool Block::Contains(Location pc) const noexcept { + return pc >= begin && pc < end; +} + +Function::Function(Location start_address) + : entrypoint{start_address}, labels{Label{ + .address{start_address}, + .block_id{0}, + .stack{}, + }} {} + +CFG::CFG(Environment& env_, Location start_address) : env{env_} { + functions.emplace_back(start_address); + for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { + while (!functions[function_id].labels.empty()) { + Function& function{functions[function_id]}; + Label label{function.labels.back()}; + function.labels.pop_back(); + AnalyzeLabel(function_id, label); + } + } +} + +void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { + if (InspectVisitedBlocks(function_id, label)) { + // Label address has been visited + return; + } + // Try to find the next block + Function* function{&functions[function_id]}; + Location pc{label.address}; + const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, + [function](Location pc, u32 block_index) { + return pc < function->blocks_data[block_index].begin; + })}; + const auto next_index{std::distance(function->blocks.begin(), next)}; + const bool is_last{next == function->blocks.end()}; + Location next_pc; + BlockId next_id{UNREACHABLE_BLOCK_ID}; + if (!is_last) { + next_pc = function->blocks_data[*next].begin; + next_id = function->blocks_data[*next].id; + } + // Insert before the next block + Block block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{label.block_id}, + .stack{std::move(label.stack)}, + .cond{true}, + .branch_true{UNREACHABLE_BLOCK_ID}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }; + // Analyze instructions until it reaches an already visited block or there's a branch + bool is_branch{false}; + while (is_last || pc < next_pc) { + is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; + if (is_branch) { + break; + } + ++pc; + } + if (!is_branch) { + // If the block finished without a branch, + // it means that the next instruction is already visited, jump to it + block.end = pc; + block.cond = true; + block.branch_true = next_id; + block.branch_false = UNREACHABLE_BLOCK_ID; + } + // Function's pointer might be invalid, resolve it again + function = &functions[function_id]; + const u32 new_block_index = static_cast(function->blocks_data.size()); + function->blocks.insert(function->blocks.begin() + next_index, new_block_index); + function->blocks_data.push_back(std::move(block)); +} + +bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { + const Location pc{label.address}; + Function& function{functions[function_id]}; + const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { + return function.blocks_data[block_index].Contains(pc); + })}; + if (it == function.blocks.end()) { + // Address has not been visited + return false; + } + Block& block{function.blocks_data[*it]}; + if (block.begin == pc) { + throw LogicError("Dangling branch"); + } + const u32 first_index{*it}; + const u32 second_index{static_cast(function.blocks_data.size())}; + const std::array new_indices{first_index, second_index}; + std::array split_blocks{Split(std::move(block), pc, label.block_id)}; + function.blocks_data[*it] = std::move(split_blocks[0]); + function.blocks_data.push_back(std::move(split_blocks[1])); + function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + return true; +} + +CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { + const Instruction inst{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(inst.raw)}; + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::RET: + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + switch (opcode) { + case Opcode::BRA: + case Opcode::JMP: + AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::BRX: + case Opcode::JMX: + AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::RET: + block.end_class = EndClass::Return; + break; + default: + break; + } + block.end = pc; + return AnalysisState::Branch; + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::SYNC: { + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; + block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block.end = pc; + return AnalysisState::Branch; + } + case Opcode::PBK: + case Opcode::PCNT: + case Opcode::PEXIT: + case Opcode::PLONGJMP: + case Opcode::SSY: + block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + return AnalysisState::Continue; + case Opcode::EXIT: + return AnalyzeEXIT(block, function_id, pc, inst); + case Opcode::PRET: + throw NotImplementedException("PRET flow analysis"); + case Opcode::CAL: + case Opcode::JCAL: { + const bool is_absolute{IsAbsoluteJump(opcode)}; + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function into the list if it doesn't exist + if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { + functions.push_back(cal_pc); + } + // Handle CAL like a regular instruction + break; + } + default: + break; + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{true} || pred == Predicate{false}) { + return AnalysisState::Continue; + } + const IR::Condition cond{static_cast(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + return AnalysisState::Branch; +} + +void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, + EndClass insn_end_class, IR::Condition cond) { + if (block.begin != pc) { + // If the block doesn't start in the conditional instruction + // mark it as a label to visit it later + block.end = pc; + block.cond = true; + block.branch_true = AddLabel(block, block.stack, pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return; + } + // Impersonate the visited block with a virtual block + // Jump from this virtual to the real conditional instruction and the next instruction + Function& function{functions[function_id]}; + const BlockId conditional_block_id{++function.current_block_id}; + function.blocks.push_back(static_cast(function.blocks_data.size())); + Block& virtual_block{function.blocks_data.emplace_back(Block{ + .begin{}, // Virtual block + .end{}, + .end_class{EndClass::Branch}, + .id{block.id}, // Impersonating + .stack{block.stack}, + .cond{cond}, + .branch_true{conditional_block_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + })}; + // Set the end properties of the conditional instruction and give it a new identity + Block& conditional_block{block}; + conditional_block.end = pc; + conditional_block.end_class = insn_end_class; + conditional_block.id = conditional_block_id; + // Add a label to the instruction after the conditional instruction + const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + // Branch to the next instruction from the virtual block + virtual_block.branch_false = endif_block_id; + // And branch to it from the conditional instruction if it is a branch + if (insn_end_class == EndClass::Branch) { + conditional_block.cond = true; + conditional_block.branch_true = endif_block_id; + conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + } +} + +bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode) { + if (inst.branch.is_cbuf) { + throw NotImplementedException("Branch with constant buffer offset"); + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false}) { + return false; + } + const bool has_flow_test{HasFlowTest(opcode)}; + const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + block.cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); + block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + } else { + block.cond = true; + } + return true; +} + +void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute) { + const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); +} + +void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { + throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +} + +void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function to the function list if it doesn't exist + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + if (it == functions.end()) { + functions.emplace_back(cal_pc); + } +} + +CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, + Instruction inst) { + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { + // EXIT will never be taken + return AnalysisState::Continue; + } + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + if (block.stack.Peek(Token::PEXIT).has_value()) { + throw NotImplementedException("Conditional EXIT with PEXIT token"); + } + const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + return AnalysisState::Branch; + } + if (const std::optional exit_pc{block.stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; + block.cond = true; + block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return AnalysisState::Branch; + } + block.end = pc; + block.end_class = EndClass::Exit; + return AnalysisState::Branch; +} + +BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { + Function& function{functions[function_id]}; + if (block.begin == pc) { + return block.id; + } + const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; + if (target != function.blocks_data.end()) { + return target->id; + } + const BlockId block_id{++function.current_block_id}; + function.labels.push_back(Label{ + .address{pc}, + .block_id{block_id}, + .stack{std::move(stack)}, + }); + return block_id; +} + +std::string CFG::Dot() const { + int node_uid{0}; + + std::string dot{"digraph shader {\n"}; + for (const Function& function : functions) { + dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); + dot += fmt::format("\t\tnode [style=filled];\n"); + for (const u32 block_index : function.blocks) { + const Block& block{function.blocks_data[block_index]}; + const std::string name{Name(block)}; + const auto add_branch = [&](BlockId branch_id, bool add_label) { + const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; + dot += fmt::format("\t\t{}->", name); + if (it == function.blocks_data.end()) { + dot += fmt::format("\"Unknown label {}\"", branch_id); + } else { + dot += Name(*it); + }; + if (add_label && block.cond != true && block.cond != false) { + dot += fmt::format(" [label=\"{}\"]", block.cond); + } + dot += '\n'; + }; + dot += fmt::format("\t\t{};\n", name); + switch (block.end_class) { + case EndClass::Branch: + if (block.cond != false) { + add_branch(block.branch_true, true); + } + if (block.cond != true) { + add_branch(block.branch_false, false); + } + break; + case EndClass::Exit: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Return: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Unreachable: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format( + "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); + ++node_uid; + break; + } + } + if (function.entrypoint == 8) { + dot += fmt::format("\t\tlabel = \"main\";\n"); + } else { + dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); + } + dot += "\t}\n"; + } + if (!functions.empty()) { + if (functions.front().blocks.empty()) { + dot += "Start;\n"; + } else { + dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + } + dot += fmt::format("\tStart [shape=diamond];\n"); + } + dot += "}\n"; + return dot; +} + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..b2ab0cdc3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -0,0 +1,137 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/condition.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell::Flow { + +using BlockId = u32; +using FunctionId = size_t; + +constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast(-1)}; + +enum class EndClass { + Branch, + Exit, + Return, + Unreachable, +}; + +enum class Token { + SSY, + PBK, + PEXIT, + PRET, + PCNT, + PLONGJMP, +}; + +struct StackEntry { + auto operator<=>(const StackEntry&) const noexcept = default; + + Token token; + Location target; +}; + +class Stack { +public: + void Push(Token token, Location target); + [[nodiscard]] std::pair Pop(Token token) const; + [[nodiscard]] std::optional Peek(Token token) const; + [[nodiscard]] Stack Remove(Token token) const; + +private: + boost::container::small_vector entries; +}; + +struct Block { + [[nodiscard]] bool Contains(Location pc) const noexcept; + + Location begin; + Location end; + EndClass end_class; + BlockId id; + Stack stack; + IR::Condition cond; + BlockId branch_true; + BlockId branch_false; +}; + +struct Label { + Location address; + BlockId block_id; + Stack stack; +}; + +struct Function { + Function(Location start_address); + + Location entrypoint; + BlockId current_block_id{0}; + boost::container::small_vector labels; + boost::container::small_vector blocks; + boost::container::small_vector blocks_data; +}; + +class CFG { + enum class AnalysisState { + Branch, + Continue, + }; + +public: + explicit CFG(Environment& env, Location start_address); + + [[nodiscard]] std::string Dot() const; + + [[nodiscard]] std::span Functions() const noexcept { + return std::span(functions.data(), functions.size()); + } + +private: + void AnalyzeLabel(FunctionId function_id, Label& label); + + /// Inspect already visited blocks. + /// Return true when the block has already been visited + [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + + [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + + void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + IR::Condition cond); + + /// Return true when the branch instruction is confirmed to be a branch + [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, + Instruction inst, Opcode opcode); + + void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute); + void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + + /// Return the branch target block id + [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, + FunctionId function_id); + + Environment& env; + boost::container::small_vector functions; + FunctionId current_function_id{0}; +}; + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..ab1cc6c8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { +namespace { +struct MaskValue { + u64 mask; + u64 value; +}; + +constexpr MaskValue MaskValueFromEncoding(const char* encoding) { + u64 mask{}; + u64 value{}; + u64 bit{u64(1) << 63}; + while (*encoding) { + switch (*encoding) { + case '0': + mask |= bit; + break; + case '1': + mask |= bit; + value |= bit; + break; + case '-': + break; + case ' ': + break; + default: + throw LogicError("Invalid encoding character '{}'", *encoding); + } + ++encoding; + if (*encoding != ' ') { + bit >>= 1; + } + } + return MaskValue{.mask{mask}, .value{value}}; +} + +struct InstEncoding { + MaskValue mask_value; + Opcode opcode; +}; +constexpr std::array UNORDERED_ENCODINGS{ +#define INST(name, cute, encode) \ + InstEncoding{ \ + .mask_value{MaskValueFromEncoding(encode)}, \ + .opcode{Opcode::name}, \ + }, +#include "maxwell.inc" +#undef INST +}; + +constexpr auto SortedEncodings() { + std::array encodings{UNORDERED_ENCODINGS}; + std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { + return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); + }); + return encodings; +} +constexpr auto ENCODINGS{SortedEncodings()}; + +constexpr int WidestLeftBits() { + int bits{64}; + for (const InstEncoding& encoding : ENCODINGS) { + bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); + } + return 64 - bits; +} +constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; +constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; + +constexpr size_t ToFastLookupIndex(u64 value) { + return static_cast(value >> MASK_SHIFT); +} + +constexpr size_t FastLookupSize() { + size_t max_width{}; + for (const InstEncoding& encoding : ENCODINGS) { + max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); + } + return max_width + 1; +} +constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; + +struct InstInfo { + [[nodiscard]] u64 Mask() const noexcept { + return static_cast(high_mask) << MASK_SHIFT; + } + + [[nodiscard]] u64 Value() const noexcept { + return static_cast(high_value) << MASK_SHIFT; + } + + u16 high_mask; + u16 high_value; + Opcode opcode; +}; + +constexpr auto MakeFastLookupTableIndex(size_t index) { + std::array encodings{}; + size_t element{}; + for (const auto& encoding : ENCODINGS) { + const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; + const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; + if ((index & mask) == value) { + encodings.at(element) = InstInfo{ + .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, + .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, + .opcode{encoding.opcode}, + }; + ++element; + } + } + return encodings; +} + +/*constexpr*/ auto MakeFastLookupTable() { + auto encodings{std::make_unique, FAST_LOOKUP_SIZE>>()}; + for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { + (*encodings)[index] = MakeFastLookupTableIndex(index); + } + return encodings; +} +const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; +} // Anonymous namespace + +Opcode Decode(u64 insn) { + const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; + const auto it{std::ranges::find_if( + table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; + if (it == table.end()) { + throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); + } + return it->opcode; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..2a3dd28e8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { + +[[nodiscard]] Opcode Decode(u64 insn); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..57fd531f2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" + +namespace Shader::Maxwell { + +struct Predicate { + Predicate() = default; + Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} + Predicate(bool value) : index{7}, negated{!value} {} + Predicate(u64 raw) : index{static_cast(raw & 7)}, negated{(raw & 8) != 0} {} + + unsigned index; + bool negated; +}; + +inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { + return lhs.index == rhs.index && lhs.negated == rhs.negated; +} + +inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { + return !(lhs == rhs); +} + +union Instruction { + Instruction(u64 raw_) : raw{raw_} {} + + u64 raw; + + union { + BitField<5, 1, u64> is_cbuf; + BitField<0, 5, IR::FlowTest> flow_test; + + [[nodiscard]] u32 Absolute() const noexcept { + return static_cast(absolute); + } + + [[nodiscard]] s32 Offset() const noexcept { + return static_cast(offset); + } + + private: + BitField<20, 24, s64> offset; + BitField<20, 32, u64> absolute; + } branch; + + [[nodiscard]] Predicate Pred() const noexcept { + return Predicate{pred}; + } + +private: + BitField<16, 4, u64> pred; +}; +static_assert(std::is_trivially_copyable_v); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..66b51a19e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Maxwell { + +class Location { + static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits::max()}; + +public: + constexpr Location() = default; + + constexpr Location(u32 initial_offset) : offset{initial_offset} { + if (initial_offset % 8 != 0) { + throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); + } + Align(); + } + + [[nodiscard]] constexpr u32 Offset() const noexcept { + return offset; + } + + [[nodiscard]] constexpr bool IsVirtual() const { + return offset == VIRTUAL_OFFSET; + } + + constexpr auto operator<=>(const Location&) const noexcept = default; + + constexpr Location operator++() noexcept { + const Location copy{*this}; + Step(); + return copy; + } + + constexpr Location operator++(int) noexcept { + Step(); + return *this; + } + + constexpr Location operator--() noexcept { + const Location copy{*this}; + Back(); + return copy; + } + + constexpr Location operator--(int) noexcept { + Back(); + return *this; + } + + constexpr Location operator+(int number) const { + Location new_pc{*this}; + while (number > 0) { + --number; + ++new_pc; + } + while (number < 0) { + ++number; + --new_pc; + } + return new_pc; + } + + constexpr Location operator-(int number) const { + return operator+(-number); + } + +private: + constexpr void Align() { + offset += offset % 32 == 0 ? 8 : 0; + } + + constexpr void Step() { + offset += 8 + (offset % 32 == 24 ? 8 : 0); + } + + constexpr void Back() { + offset -= 8 + (offset % 32 == 8 ? 8 : 0); + } + + u32 offset{VIRTUAL_OFFSET}; +}; + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..1515285bf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -0,0 +1,285 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +INST(AL2P, "AL2P", "1110 1111 1010 0---") +INST(ALD, "ALD", "1110 1111 1101 1---") +INST(AST, "AST", "1110 1111 1111 0---") +INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") +INST(ATOM, "ATOM", "1110 1101 ---- ----") +INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") +INST(ATOMS, "ATOMS", "1110 1100 ---- ----") +INST(B2R, "B2R", "1111 0000 1011 1---") +INST(BAR, "BAR", "1111 0000 1010 1---") +INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") +INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") +INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") +INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") +INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") +INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") +INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") +INST(BPT, "BPT", "1110 0011 1010 ----") +INST(BRA, "BRA", "1110 0010 0100 ----") +INST(BRK, "BRK", "1110 0011 0100 ----") +INST(BRX, "BRX", "1110 0010 0101 ----") +INST(CAL, "CAL", "1110 0010 0110 ----") +INST(CCTL, "CCTL", "1110 1111 011- ----") +INST(CCTLL, "CCTLL", "1110 1111 100- ----") +INST(CONT, "CONT", "1110 0011 0101 ----") +INST(CS2R, "CS2R", "0101 0000 1100 1---") +INST(CSET, "CSET", "0101 0000 1001 1---") +INST(CSETP, "CSETP", "0101 0000 1010 0---") +INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") +INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") +INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") +INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") +INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") +INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0010 1011 0111 ----") +INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") +INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") +INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") +INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") +INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") +INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") +INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") +INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") +INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") +INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") +INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") +INST(EXIT, "EXIT", "1110 0011 0000 ----") +INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") +INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") +INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") +INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") +INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") +INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") +INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") +INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") +INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") +INST(FADD32I, "FADD32I", "0000 10-- ---- ----") +INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") +INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") +INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") +INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") +INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") +INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") +INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") +INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") +INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") +INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") +INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") +INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") +INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") +INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") +INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") +INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") +INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") +INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") +INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") +INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") +INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") +INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") +INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") +INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") +INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") +INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") +INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") +INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") +INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") +INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") +INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") +INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") +INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") +INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") +INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") +INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") +INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") +INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") +INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") +INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") +INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") +INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") +INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") +INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") +INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 1100 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 1100 0--- ----") +INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") +INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") +INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") +INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") +INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") +INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") +INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") +INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") +INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") +INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") +INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") +INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") +INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") +INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") +INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") +INST(IADD32I, "IADD32I", "0001 110- ---- ----") +INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") +INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") +INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") +INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") +INST(IDE, "IDE", "1110 0011 1001 ----") +INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") +INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") +INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") +INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") +INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") +INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") +INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") +INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") +INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") +INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") +INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") +INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") +INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") +INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") +INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") +INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") +INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") +INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") +INST(IPA, "IPA", "1110 0000 ---- ----") +INST(ISBERD, "ISBERD", "1110 1111 1101 0---") +INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") +INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") +INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") +INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") +INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") +INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") +INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") +INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") +INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") +INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") +INST(JCAL, "JCAL", "1110 0010 0010 ----") +INST(JMP, "JMP", "1110 0010 0001 ----") +INST(JMX, "JMX", "1110 0010 0000 ----") +INST(KIL, "KIL", "1110 0011 0011 ----") +INST(LD, "LD", "100- ---- ---- ----") +INST(LDC, "LDC", "1110 1111 1001 0---") +INST(LDG, "LDG", "1110 1110 1101 0---") +INST(LDL, "LDL", "1110 1111 0100 0---") +INST(LDS, "LDS", "1110 1111 0100 1---") +INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") +INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") +INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") +INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") +INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") +INST(LEPC, "LEPC", "0101 0000 1101 0---") +INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") +INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") +INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") +INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") +INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----") +INST(LOP32I, "LOP32I", "0000 01-- ---- ----") +INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") +INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") +INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") +INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") +INST(MOV32I, "MOV32I", "0000 0001 0000 ----") +INST(MUFU, "MUFU", "0101 0000 1000 0---") +INST(NOP, "NOP", "0101 0000 1011 0---") +INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") +INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") +INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") +INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") +INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") +INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") +INST(PBK, "PBK", "1110 0010 1010 ----") +INST(PCNT, "PCNT", "1110 0010 1011 ----") +INST(PEXIT, "PEXIT", "1110 0010 0011 ----") +INST(PIXLD, "PIXLD", "1110 1111 1110 1---") +INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") +INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") +INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") +INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") +INST(PRET, "PRET", "1110 0010 0111 ----") +INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") +INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") +INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") +INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") +INST(PSET, "PSET", "0101 0000 1000 1---") +INST(PSETP, "PSETP", "0101 0000 1001 0---") +INST(R2B, "R2B", "1111 0000 1100 0---") +INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") +INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") +INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") +INST(RAM, "RAM", "1110 0011 1000 ----") +INST(RED, "RED", "1110 1011 1111 1---") +INST(RET, "RET", "1110 0011 0010 ----") +INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") +INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") +INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") +INST(RTT, "RTT", "1110 0011 0110 ----") +INST(S2R, "S2R", "1111 0000 1100 1---") +INST(SAM, "SAM", "1110 0011 0111 ----") +INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") +INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") +INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") +INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") +INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") +INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") +INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") +INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") +INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") +INST(SHFL, "SHFL", "1110 1111 0001 0---") +INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") +INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") +INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") +INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") +INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") +INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") +INST(SSY, "SSY", "1110 0010 1001 ----") +INST(ST, "ST", "101- ---- ---- ----") +INST(STG, "STG", "1110 1110 1101 1---") +INST(STL, "STL", "1110 1111 0101 0---") +INST(STP, "STP", "1110 1110 1010 0---") +INST(STS, "STS", "1110 1111 0101 1---") +INST(SUATOM_cas, "SUATOM", "1110 1010 ---- ----") +INST(SULD, "SULD", "1110 1011 000- ----") +INST(SURED, "SURED", "1110 1011 010- ----") +INST(SUST, "SUST", "1110 1011 001- ----") +INST(SYNC, "SYNC", "1111 0000 1111 1---") +INST(TEX, "TEX", "1100 00-- --11 1---") +INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEXS, "TEXS", "1101 -00- ---- ----") +INST(TLD, "TLD", "1101 1100 --11 1---") +INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD4, "TLD4", "1100 10-- --11 1---") +INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") +INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") +INST(TLDS, "TLDS", "1101 -01- ---- ----") +INST(TMML, "TMML", "1101 1111 0101 1---") +INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") +INST(TXA, "TXA", "1101 1111 0100 0---") +INST(TXD, "TXD", "1101 1110 0011 10--") +INST(TXD_b, "TXD (b)", "1101 1110 0111 10--") +INST(TXQ, "TXQ", "1101 1111 0100 1---") +INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") +INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") +INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") +INST(VADD, "VADD", "0010 00-- ---- ----") +INST(VMAD, "VMAD", "0101 1111 ---- ----") +INST(VMNMX, "VMNMX", "0011 101- ---- ----") +INST(VOTE, "VOTE", "0101 0000 1101 1---") +INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") +INST(VSET, "VSET", "0100 000- ---- ----") +INST(VSETP, "VSETP", "0101 0000 1111 0---") +INST(VSHL, "VSHL", "0101 0111 ---- ----") +INST(VSHR, "VSHR", "0101 0110 ---- ----") +INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") +INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") +INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") +INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") + +// Removed due to its weird formatting making fast tables larger +// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcode.cpp new file mode 100644 index 000000000..8a7bdb611 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcode.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) #cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); + } + return NAME_TABLE[static_cast(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcode.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcode.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp new file mode 100644 index 000000000..67a98ba57 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/termination_code.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +Program::Function::~Function() { + std::ranges::for_each(blocks, &std::destroy_at); +} + +Program::Program(Environment& env, const Flow::CFG& cfg) { + std::vector block_map; + functions.reserve(cfg.Functions().size()); + + for (const Flow::Function& cfg_function : cfg.Functions()) { + Function& function{functions.emplace_back()}; + + const size_t num_blocks{cfg_function.blocks.size()}; + IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; + function.blocks.reserve(num_blocks); + + block_map.resize(cfg_function.blocks_data.size()); + + // Visit the instructions of all blocks + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + + IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; + ++block_memory; + function.blocks.push_back(block); + block_map[flow_block.id] = block; + } + // Now that all blocks are defined, emit the termination instructions + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } + } +} + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map inst_to_index; + std::map block_to_index; + + for (const Program::Function& function : program.functions) { + for (const IR::Block* const block : function.blocks) { + block_to_index.emplace(block, index); + ++index; + } + } + std::string ret; + for (const Program::Function& function : program.functions) { + ret += fmt::format("Function\n"); + for (const IR::Block* const block : function.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + } + return ret; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h new file mode 100644 index 000000000..7814b2c01 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +class Program { + friend std::string DumpProgram(const Program& program); + +public: + explicit Program(Environment& env, const Flow::CFG& cfg); + +private: + struct Function { + ~Function(); + + std::vector blocks; + }; + + boost::pool_allocator + block_alloc_pool; + std::vector functions; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp new file mode 100644 index 000000000..a4ea5c5e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/termination_code.h" + +namespace Shader::Maxwell { + +static void EmitExit(IR::IREmitter& ir) { + ir.Exit(); +} + +static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { + switch (flow_test) { + case IR::FlowTest::T: + return ir.Imm1(true); + case IR::FlowTest::F: + return ir.Imm1(false); + case IR::FlowTest::NE: + // FIXME: Verify this + return ir.LogicalNot(ir.GetZFlag()); + case IR::FlowTest::NaN: + // FIXME: Verify this + return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); + default: + throw NotImplementedException("Flow test {}", flow_test); + } +} + +static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { + const IR::FlowTest flow_test{cond.FlowTest()}; + const auto [pred, pred_negated]{cond.Pred()}; + if (pred == IR::Pred::PT && !pred_negated) { + return GetFlowTest(flow_test, ir); + } + if (flow_test == IR::FlowTest::T) { + return ir.GetPred(pred, pred_negated); + } + return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); +} + +static void EmitBranch(const Flow::Block& flow_block, std::span block_map, + IR::IREmitter& ir) { + if (flow_block.cond == true) { + return ir.Branch(block_map[flow_block.branch_true]); + } + if (flow_block.cond == false) { + return ir.Branch(block_map[flow_block.branch_false]); + } + return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], + block_map[flow_block.branch_false]); +} + +void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map) { + IR::Block* const block{block_map[flow_block.id]}; + IR::IREmitter ir(*block); + switch (flow_block.end_class) { + case Flow::EndClass::Branch: + EmitBranch(flow_block, block_map, ir); + break; + case Flow::EndClass::Exit: + EmitExit(ir); + break; + case Flow::EndClass::Return: + ir.Return(); + break; + case Flow::EndClass::Unreachable: + ir.Unreachable(); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h new file mode 100644 index 000000000..b0d667942 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp new file mode 100644 index 000000000..e98bbd0d1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp @@ -0,0 +1,15 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::EXIT(u64) { + ir.Exit(); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..c4288d9a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class DestFormat : u64 { + Invalid, + I16, + I32, + I64, +}; +enum class SrcFormat : u64 { + Invalid, + F16, + F32, + F64, +}; +enum class Rounding : u64 { + Round, + Floor, + Ceil, + Trunc, +}; + +union F2I { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, DestFormat> dest_format; + BitField<10, 2, SrcFormat> src_format; + BitField<12, 1, u64> is_signed; + BitField<39, 1, Rounding> rounding; + BitField<49, 1, u64> half; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> abs; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> neg; +}; + +size_t BitSize(DestFormat dest_format) { + switch (dest_format) { + case DestFormat::I16: + return 16; + case DestFormat::I32: + return 32; + case DestFormat::I64: + return 64; + default: + throw NotImplementedException("Invalid destination format {}", dest_format); + } +} + +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { + // F2I is used to convert from a floating point value to an integer + const F2I f2i{insn}; + + const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::U16U32U64 rounded_value{[&] { + switch (f2i.rounding) { + case Rounding::Round: + return v.ir.FPRoundEven(float_value); + case Rounding::Floor: + return v.ir.FPFloor(float_value); + case Rounding::Ceil: + return v.ir.FPCeil(float_value); + case Rounding::Trunc: + return v.ir.FPTrunc(float_value); + default: + throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); + } + }()}; + + // TODO: Handle out of bounds conversions. + // For example converting F32 65537.0 to U16, the expected value is 0xffff, + + const bool is_signed{f2i.is_signed != 0}; + const size_t bitsize{BitSize(f2i.dest_format)}; + const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + + v.X(f2i.dest_reg, result); + + if (f2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + if (is_signed) { + v.SetSFlag(v.ir.GetSignFromOp(result)); + } else { + v.ResetSFlag(); + } + v.ResetCFlag(); + + // TODO: Investigate if out of bound conversions sets the overflow flag + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2I_reg(u64 insn) { + union { + F2I base; + BitField<20, 8, IR::Reg> src_reg; + } const f2i{insn}; + + const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { + switch (f2i.base.src_format) { + case SrcFormat::F16: + return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); + case SrcFormat::F32: + return X(f2i.src_reg); + case SrcFormat::F64: + return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); + default: + throw NotImplementedException("Invalid F2I source format {}", + f2i.base.src_format.Value()); + } + }()}; + + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_cbuf(u64) { + throw NotImplementedException("{}", Opcode::F2I_cbuf); +} + +void TranslatorVisitor::F2I_imm(u64) { + throw NotImplementedException("{}", Opcode::F2I_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..e2ab0dab2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Operation { + Cos = 0, + Sin = 1, + Ex2 = 2, // Base 2 exponent + Lg2 = 3, // Base 2 logarithm + Rcp = 4, // Reciprocal + Rsq = 5, // Reciprocal square root + Rcp64H = 6, // 64-bit reciprocal + Rsq64H = 7, // 64-bit reciprocal square root + Sqrt = 8, +}; +} // Anonymous namespace + +void TranslatorVisitor::MUFU(u64 insn) { + // MUFU is used to implement a bunch of special functions. See Operation. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 4, Operation> operation; + BitField<46, 1, u64> abs; + BitField<48, 1, u64> neg; + BitField<50, 1, u64> sat; + } const mufu{insn}; + + const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::U32 value{[&]() -> IR::U32 { + switch (mufu.operation) { + case Operation::Cos: + return ir.FPCosNotReduced(op_a); + case Operation::Sin: + return ir.FPSinNotReduced(op_a); + case Operation::Ex2: + return ir.FPExp2NotReduced(op_a); + case Operation::Lg2: + return ir.FPLog2(op_a); + case Operation::Rcp: + return ir.FPRecip(op_a); + case Operation::Rsq: + return ir.FPRecipSqrt(op_a); + case Operation::Rcp64H: + throw NotImplementedException("MUFU.RCP64H"); + case Operation::Rsq64H: + throw NotImplementedException("MUFU.RSQ64H"); + case Operation::Sqrt: + return ir.FPSqrt(op_a); + default: + throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); + } + }()}; + + if (mufu.sat) { + value = ir.FPSaturate(value); + } + + X(mufu.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..7bc7ce9f2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +IR::U32 TranslatorVisitor::X(IR::Reg reg) { + return ir.GetReg(reg); +} + +void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { + ir.SetReg(dest_reg, value); +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); + } + const IR::U32 binding{ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{ir.Imm32(static_cast(cbuf.offset) * 4)}; + return ir.GetCbuf(binding, byte_offset); +} + +IR::U32 TranslatorVisitor::GetImm(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const s32 positive_value{static_cast(imm.value)}; + const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + +void TranslatorVisitor::SetZFlag(const IR::U1& value) { + ir.SetZFlag(value); +} + +void TranslatorVisitor::SetSFlag(const IR::U1& value) { + ir.SetSFlag(value); +} + +void TranslatorVisitor::SetCFlag(const IR::U1& value) { + ir.SetCFlag(value); +} + +void TranslatorVisitor::SetOFlag(const IR::U1& value) { + ir.SetOFlag(value); +} + +void TranslatorVisitor::ResetZero() { + SetZFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetSFlag() { + SetSFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetCFlag() { + SetCFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetOFlag() { + SetOFlag(ir.Imm1(false)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..bc607b002 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -0,0 +1,316 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" + +namespace Shader::Maxwell { + +class TranslatorVisitor { +public: + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + + Environment& env; + IR::IREmitter ir; + + void AL2P(u64 insn); + void ALD(u64 insn); + void AST(u64 insn); + void ATOM_cas(u64 insn); + void ATOM(u64 insn); + void ATOMS_cas(u64 insn); + void ATOMS(u64 insn); + void B2R(u64 insn); + void BAR(u64 insn); + void BFE_reg(u64 insn); + void BFE_cbuf(u64 insn); + void BFE_imm(u64 insn); + void BFI_reg(u64 insn); + void BFI_rc(u64 insn); + void BFI_cr(u64 insn); + void BFI_imm(u64 insn); + void BPT(u64 insn); + void BRA(u64 insn); + void BRK(u64 insn); + void BRX(u64 insn); + void CAL(u64 insn); + void CCTL(u64 insn); + void CCTLL(u64 insn); + void CONT(u64 insn); + void CS2R(u64 insn); + void CSET(u64 insn); + void CSETP(u64 insn); + void DADD_reg(u64 insn); + void DADD_cbuf(u64 insn); + void DADD_imm(u64 insn); + void DEPBAR(u64 insn); + void DFMA_reg(u64 insn); + void DFMA_rc(u64 insn); + void DFMA_cr(u64 insn); + void DFMA_imm(u64 insn); + void DMNMX_reg(u64 insn); + void DMNMX_cbuf(u64 insn); + void DMNMX_imm(u64 insn); + void DMUL_reg(u64 insn); + void DMUL_cbuf(u64 insn); + void DMUL_imm(u64 insn); + void DSET_reg(u64 insn); + void DSET_cbuf(u64 insn); + void DSET_imm(u64 insn); + void DSETP_reg(u64 insn); + void DSETP_cbuf(u64 insn); + void DSETP_imm(u64 insn); + void EXIT(u64 insn); + void F2F_reg(u64 insn); + void F2F_cbuf(u64 insn); + void F2F_imm(u64 insn); + void F2I_reg(u64 insn); + void F2I_cbuf(u64 insn); + void F2I_imm(u64 insn); + void FADD_reg(u64 insn); + void FADD_cbuf(u64 insn); + void FADD_imm(u64 insn); + void FADD32I(u64 insn); + void FCHK_reg(u64 insn); + void FCHK_cbuf(u64 insn); + void FCHK_imm(u64 insn); + void FCMP_reg(u64 insn); + void FCMP_rc(u64 insn); + void FCMP_cr(u64 insn); + void FCMP_imm(u64 insn); + void FFMA_reg(u64 insn); + void FFMA_rc(u64 insn); + void FFMA_cr(u64 insn); + void FFMA_imm(u64 insn); + void FFMA32I(u64 insn); + void FLO_reg(u64 insn); + void FLO_cbuf(u64 insn); + void FLO_imm(u64 insn); + void FMNMX_reg(u64 insn); + void FMNMX_cbuf(u64 insn); + void FMNMX_imm(u64 insn); + void FMUL_reg(u64 insn); + void FMUL_cbuf(u64 insn); + void FMUL_imm(u64 insn); + void FMUL32I(u64 insn); + void FSET_reg(u64 insn); + void FSET_cbuf(u64 insn); + void FSET_imm(u64 insn); + void FSETP_reg(u64 insn); + void FSETP_cbuf(u64 insn); + void FSETP_imm(u64 insn); + void FSWZADD(u64 insn); + void GETCRSPTR(u64 insn); + void GETLMEMBASE(u64 insn); + void HADD2_reg(u64 insn); + void HADD2_cbuf(u64 insn); + void HADD2_imm(u64 insn); + void HADD2_32I(u64 insn); + void HFMA2_reg(u64 insn); + void HFMA2_rc(u64 insn); + void HFMA2_cr(u64 insn); + void HFMA2_imm(u64 insn); + void HFMA2_32I(u64 insn); + void HMUL2_reg(u64 insn); + void HMUL2_cbuf(u64 insn); + void HMUL2_imm(u64 insn); + void HMUL2_32I(u64 insn); + void HSET2_reg(u64 insn); + void HSET2_cbuf(u64 insn); + void HSET2_imm(u64 insn); + void HSETP2_reg(u64 insn); + void HSETP2_cbuf(u64 insn); + void HSETP2_imm(u64 insn); + void I2F_reg(u64 insn); + void I2F_cbuf(u64 insn); + void I2F_imm(u64 insn); + void I2I_reg(u64 insn); + void I2I_cbuf(u64 insn); + void I2I_imm(u64 insn); + void IADD_reg(u64 insn); + void IADD_cbuf(u64 insn); + void IADD_imm(u64 insn); + void IADD3_reg(u64 insn); + void IADD3_cbuf(u64 insn); + void IADD3_imm(u64 insn); + void IADD32I(u64 insn); + void ICMP_reg(u64 insn); + void ICMP_rc(u64 insn); + void ICMP_cr(u64 insn); + void ICMP_imm(u64 insn); + void IDE(u64 insn); + void IDP_reg(u64 insn); + void IDP_imm(u64 insn); + void IMAD_reg(u64 insn); + void IMAD_rc(u64 insn); + void IMAD_cr(u64 insn); + void IMAD_imm(u64 insn); + void IMAD32I(u64 insn); + void IMADSP_reg(u64 insn); + void IMADSP_rc(u64 insn); + void IMADSP_cr(u64 insn); + void IMADSP_imm(u64 insn); + void IMNMX_reg(u64 insn); + void IMNMX_cbuf(u64 insn); + void IMNMX_imm(u64 insn); + void IMUL_reg(u64 insn); + void IMUL_cbuf(u64 insn); + void IMUL_imm(u64 insn); + void IMUL32I(u64 insn); + void IPA(u64 insn); + void ISBERD(u64 insn); + void ISCADD_reg(u64 insn); + void ISCADD_cbuf(u64 insn); + void ISCADD_imm(u64 insn); + void ISCADD32I(u64 insn); + void ISET_reg(u64 insn); + void ISET_cbuf(u64 insn); + void ISET_imm(u64 insn); + void ISETP_reg(u64 insn); + void ISETP_cbuf(u64 insn); + void ISETP_imm(u64 insn); + void JCAL(u64 insn); + void JMP(u64 insn); + void JMX(u64 insn); + void KIL(u64 insn); + void LD(u64 insn); + void LDC(u64 insn); + void LDG(u64 insn); + void LDL(u64 insn); + void LDS(u64 insn); + void LEA_hi_reg(u64 insn); + void LEA_hi_cbuf(u64 insn); + void LEA_lo_reg(u64 insn); + void LEA_lo_cbuf(u64 insn); + void LEA_lo_imm(u64 insn); + void LEPC(u64 insn); + void LONGJMP(u64 insn); + void LOP_reg(u64 insn); + void LOP_cbuf(u64 insn); + void LOP_imm(u64 insn); + void LOP3_reg(u64 insn); + void LOP3_cbuf(u64 insn); + void LOP3_imm(u64 insn); + void LOP32I(u64 insn); + void MEMBAR(u64 insn); + void MOV_reg(u64 insn); + void MOV_cbuf(u64 insn); + void MOV_imm(u64 insn); + void MOV32I(u64 insn); + void MUFU(u64 insn); + void NOP(u64 insn); + void OUT_reg(u64 insn); + void OUT_cbuf(u64 insn); + void OUT_imm(u64 insn); + void P2R_reg(u64 insn); + void P2R_cbuf(u64 insn); + void P2R_imm(u64 insn); + void PBK(u64 insn); + void PCNT(u64 insn); + void PEXIT(u64 insn); + void PIXLD(u64 insn); + void PLONGJMP(u64 insn); + void POPC_reg(u64 insn); + void POPC_cbuf(u64 insn); + void POPC_imm(u64 insn); + void PRET(u64 insn); + void PRMT_reg(u64 insn); + void PRMT_rc(u64 insn); + void PRMT_cr(u64 insn); + void PRMT_imm(u64 insn); + void PSET(u64 insn); + void PSETP(u64 insn); + void R2B(u64 insn); + void R2P_reg(u64 insn); + void R2P_cbuf(u64 insn); + void R2P_imm(u64 insn); + void RAM(u64 insn); + void RED(u64 insn); + void RET(u64 insn); + void RRO_reg(u64 insn); + void RRO_cbuf(u64 insn); + void RRO_imm(u64 insn); + void RTT(u64 insn); + void S2R(u64 insn); + void SAM(u64 insn); + void SEL_reg(u64 insn); + void SEL_cbuf(u64 insn); + void SEL_imm(u64 insn); + void SETCRSPTR(u64 insn); + void SETLMEMBASE(u64 insn); + void SHF_l_reg(u64 insn); + void SHF_l_imm(u64 insn); + void SHF_r_reg(u64 insn); + void SHF_r_imm(u64 insn); + void SHFL(u64 insn); + void SHL_reg(u64 insn); + void SHL_cbuf(u64 insn); + void SHL_imm(u64 insn); + void SHR_reg(u64 insn); + void SHR_cbuf(u64 insn); + void SHR_imm(u64 insn); + void SSY(u64 insn); + void ST(u64 insn); + void STG(u64 insn); + void STL(u64 insn); + void STP(u64 insn); + void STS(u64 insn); + void SUATOM_cas(u64 insn); + void SULD(u64 insn); + void SURED(u64 insn); + void SUST(u64 insn); + void SYNC(u64 insn); + void TEX(u64 insn); + void TEX_b(u64 insn); + void TEXS(u64 insn); + void TLD(u64 insn); + void TLD_b(u64 insn); + void TLD4(u64 insn); + void TLD4_b(u64 insn); + void TLD4S(u64 insn); + void TLDS(u64 insn); + void TMML(u64 insn); + void TMML_b(u64 insn); + void TXA(u64 insn); + void TXD(u64 insn); + void TXD_b(u64 insn); + void TXQ(u64 insn); + void TXQ_b(u64 insn); + void VABSDIFF(u64 insn); + void VABSDIFF4(u64 insn); + void VADD(u64 insn); + void VMAD(u64 insn); + void VMNMX(u64 insn); + void VOTE(u64 insn); + void VOTE_vtg(u64 insn); + void VSET(u64 insn); + void VSETP(u64 insn); + void VSHL(u64 insn); + void VSHR(u64 insn); + void XMAD_reg(u64 insn); + void XMAD_rc(u64 insn); + void XMAD_cr(u64 insn); + void XMAD_imm(u64 insn); + + [[nodiscard]] IR::U32 X(IR::Reg reg); + void X(IR::Reg dest_reg, const IR::U32& value); + + [[nodiscard]] IR::U32 GetCbuf(u64 insn); + + [[nodiscard]] IR::U32 GetImm(u64 insn); + + void SetZFlag(const IR::U1& value); + void SetSFlag(const IR::U1& value); + void SetCFlag(const IR::U1& value); + void SetOFlag(const IR::U1& value); + + void ResetZero(); + void ResetSFlag(); + void ResetCFlag(); + void ResetOFlag(); +}; + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..23512db1a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class InterpolationMode : u64 { + Pass = 0, + Multiply = 1, + Constant = 2, + Sc = 3, +}; + +enum class SampleMode : u64 { + Default = 0, + Centroid = 1, + Offset = 2, +}; +} // Anonymous namespace + +void TranslatorVisitor::IPA(u64 insn) { + // IPA is the instruction used to read varyings from a fragment shader. + // gl_FragCoord is mapped to the gl_Position attribute. + // It yields unknown results when used outside of the fragment shader stage. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 8, IR::Reg> multiplier; + BitField<30, 8, IR::Attribute> attribute; + BitField<38, 1, u64> idx; + BitField<51, 1, u64> sat; + BitField<52, 2, SampleMode> sample_mode; + BitField<54, 2, InterpolationMode> interpolation_mode; + } const ipa{insn}; + + // Indexed IPAs are used for indexed varyings. + // For example: + // + // in vec4 colors[4]; + // uniform int idx; + // void main() { + // gl_FragColor = colors[idx]; + // } + const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; + if (is_indexed) { + throw NotImplementedException("IPA.IDX"); + } + + const IR::Attribute attribute{ipa.attribute}; + IR::U32 value{ir.GetAttribute(attribute)}; + if (IR::IsGeneric(attribute)) { + // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; + const bool is_perspective{false}; + if (is_perspective) { + const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; + value = ir.FPMul(value, rcp_position_w); + } + } + + switch (ipa.interpolation_mode) { + case InterpolationMode::Pass: + break; + case InterpolationMode::Multiply: + value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); + break; + case InterpolationMode::Constant: + throw NotImplementedException("IPA.CONSTANT"); + case InterpolationMode::Sc: + throw NotImplementedException("IPA.SC"); + } + + // Saturated IPAs are generally generated out of clamped varyings. + // For example: clamp(some_varying, 0.0, 1.0) + const bool is_saturated{ipa.sat != 0}; + if (is_saturated) { + if (attribute == IR::Attribute::FrontFace) { + throw NotImplementedException("IPA.SAT on FrontFace"); + } + value = ir.FPSaturate(value); + } + + ir.SetReg(ipa.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..d8fd387cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -0,0 +1,90 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class StoreSize : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory) +}; +} // Anonymous namespace + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<45, 1, u64> e; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (stg.e == 0) { + // STG without .E uses a 32-bit pointer, zero-extend it + return ir.ConvertU(64, X(stg.addr_reg)); + } + if (!IR::IsAligned(stg.addr_reg, 2)) { + throw NotImplementedException("Unaligned address register"); + } + // Pack two registers to build the 32-bit address + return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); + }()}; + + switch (stg.size) { + case StoreSize::U8: + ir.WriteGlobalU8(address, X(stg.data_reg)); + break; + case StoreSize::S8: + ir.WriteGlobalS8(address, X(stg.data_reg)); + break; + case StoreSize::U16: + ir.WriteGlobalU16(address, X(stg.data_reg)); + break; + case StoreSize::S16: + ir.WriteGlobalS16(address, X(stg.data_reg)); + break; + case StoreSize::B32: + ir.WriteGlobal32(address, X(stg.data_reg)); + break; + case StoreSize::B64: { + if (!IR::IsAligned(stg.data_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + ir.WriteGlobal64(address, vector); + break; + } + case StoreSize::B128: + if (!IR::IsAligned(stg.data_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), + X(stg.data_reg + 2), X(stg.data_reg + 3))}; + ir.WriteGlobal128(address, vector); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..c907c1ffb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -0,0 +1,1105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { + +[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { + auto raw{IR::DumpBlock(block)}; + + Optimization::GetSetElimination(block); + Optimization::DeadCodeEliminationPass(block); + Optimization::IdentityRemovalPass(block); + auto dumped{IR::DumpBlock(block)}; + + fmt::print(stderr, "{}", dumped); +} + +[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { + throw NotImplementedException("Instruction {} is not implemented", opcode); +} + +void TranslatorVisitor::AL2P(u64) { + ThrowNotImplemented(Opcode::AL2P); +} + +void TranslatorVisitor::ALD(u64) { + ThrowNotImplemented(Opcode::ALD); +} + +void TranslatorVisitor::AST(u64) { + ThrowNotImplemented(Opcode::AST); +} + +void TranslatorVisitor::ATOM_cas(u64) { + ThrowNotImplemented(Opcode::ATOM_cas); +} + +void TranslatorVisitor::ATOM(u64) { + ThrowNotImplemented(Opcode::ATOM); +} + +void TranslatorVisitor::ATOMS_cas(u64) { + ThrowNotImplemented(Opcode::ATOMS_cas); +} + +void TranslatorVisitor::ATOMS(u64) { + ThrowNotImplemented(Opcode::ATOMS); +} + +void TranslatorVisitor::B2R(u64) { + ThrowNotImplemented(Opcode::B2R); +} + +void TranslatorVisitor::BAR(u64) { + ThrowNotImplemented(Opcode::BAR); +} + +void TranslatorVisitor::BFE_reg(u64) { + ThrowNotImplemented(Opcode::BFE_reg); +} + +void TranslatorVisitor::BFE_cbuf(u64) { + ThrowNotImplemented(Opcode::BFE_cbuf); +} + +void TranslatorVisitor::BFE_imm(u64) { + ThrowNotImplemented(Opcode::BFE_imm); +} + +void TranslatorVisitor::BFI_reg(u64) { + ThrowNotImplemented(Opcode::BFI_reg); +} + +void TranslatorVisitor::BFI_rc(u64) { + ThrowNotImplemented(Opcode::BFI_rc); +} + +void TranslatorVisitor::BFI_cr(u64) { + ThrowNotImplemented(Opcode::BFI_cr); +} + +void TranslatorVisitor::BFI_imm(u64) { + ThrowNotImplemented(Opcode::BFI_imm); +} + +void TranslatorVisitor::BPT(u64) { + ThrowNotImplemented(Opcode::BPT); +} + +void TranslatorVisitor::BRA(u64) { + ThrowNotImplemented(Opcode::BRA); +} + +void TranslatorVisitor::BRK(u64) { + ThrowNotImplemented(Opcode::BRK); +} + +void TranslatorVisitor::BRX(u64) { + ThrowNotImplemented(Opcode::BRX); +} + +void TranslatorVisitor::CAL(u64) { + ThrowNotImplemented(Opcode::CAL); +} + +void TranslatorVisitor::CCTL(u64) { + ThrowNotImplemented(Opcode::CCTL); +} + +void TranslatorVisitor::CCTLL(u64) { + ThrowNotImplemented(Opcode::CCTLL); +} + +void TranslatorVisitor::CONT(u64) { + ThrowNotImplemented(Opcode::CONT); +} + +void TranslatorVisitor::CS2R(u64) { + ThrowNotImplemented(Opcode::CS2R); +} + +void TranslatorVisitor::CSET(u64) { + ThrowNotImplemented(Opcode::CSET); +} + +void TranslatorVisitor::CSETP(u64) { + ThrowNotImplemented(Opcode::CSETP); +} + +void TranslatorVisitor::DADD_reg(u64) { + ThrowNotImplemented(Opcode::DADD_reg); +} + +void TranslatorVisitor::DADD_cbuf(u64) { + ThrowNotImplemented(Opcode::DADD_cbuf); +} + +void TranslatorVisitor::DADD_imm(u64) { + ThrowNotImplemented(Opcode::DADD_imm); +} + +void TranslatorVisitor::DEPBAR(u64) { + ThrowNotImplemented(Opcode::DEPBAR); +} + +void TranslatorVisitor::DFMA_reg(u64) { + ThrowNotImplemented(Opcode::DFMA_reg); +} + +void TranslatorVisitor::DFMA_rc(u64) { + ThrowNotImplemented(Opcode::DFMA_rc); +} + +void TranslatorVisitor::DFMA_cr(u64) { + ThrowNotImplemented(Opcode::DFMA_cr); +} + +void TranslatorVisitor::DFMA_imm(u64) { + ThrowNotImplemented(Opcode::DFMA_imm); +} + +void TranslatorVisitor::DMNMX_reg(u64) { + ThrowNotImplemented(Opcode::DMNMX_reg); +} + +void TranslatorVisitor::DMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::DMNMX_cbuf); +} + +void TranslatorVisitor::DMNMX_imm(u64) { + ThrowNotImplemented(Opcode::DMNMX_imm); +} + +void TranslatorVisitor::DMUL_reg(u64) { + ThrowNotImplemented(Opcode::DMUL_reg); +} + +void TranslatorVisitor::DMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::DMUL_cbuf); +} + +void TranslatorVisitor::DMUL_imm(u64) { + ThrowNotImplemented(Opcode::DMUL_imm); +} + +void TranslatorVisitor::DSET_reg(u64) { + ThrowNotImplemented(Opcode::DSET_reg); +} + +void TranslatorVisitor::DSET_cbuf(u64) { + ThrowNotImplemented(Opcode::DSET_cbuf); +} + +void TranslatorVisitor::DSET_imm(u64) { + ThrowNotImplemented(Opcode::DSET_imm); +} + +void TranslatorVisitor::DSETP_reg(u64) { + ThrowNotImplemented(Opcode::DSETP_reg); +} + +void TranslatorVisitor::DSETP_cbuf(u64) { + ThrowNotImplemented(Opcode::DSETP_cbuf); +} + +void TranslatorVisitor::DSETP_imm(u64) { + ThrowNotImplemented(Opcode::DSETP_imm); +} + +void TranslatorVisitor::EXIT(u64) { + throw LogicError("Visting EXIT instruction"); +} + +void TranslatorVisitor::F2F_reg(u64) { + ThrowNotImplemented(Opcode::F2F_reg); +} + +void TranslatorVisitor::F2F_cbuf(u64) { + ThrowNotImplemented(Opcode::F2F_cbuf); +} + +void TranslatorVisitor::F2F_imm(u64) { + ThrowNotImplemented(Opcode::F2F_imm); +} + +void TranslatorVisitor::FADD_reg(u64) { + ThrowNotImplemented(Opcode::FADD_reg); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + ThrowNotImplemented(Opcode::FADD_cbuf); +} + +void TranslatorVisitor::FADD_imm(u64) { + ThrowNotImplemented(Opcode::FADD_imm); +} + +void TranslatorVisitor::FADD32I(u64) { + ThrowNotImplemented(Opcode::FADD32I); +} + +void TranslatorVisitor::FCHK_reg(u64) { + ThrowNotImplemented(Opcode::FCHK_reg); +} + +void TranslatorVisitor::FCHK_cbuf(u64) { + ThrowNotImplemented(Opcode::FCHK_cbuf); +} + +void TranslatorVisitor::FCHK_imm(u64) { + ThrowNotImplemented(Opcode::FCHK_imm); +} + +void TranslatorVisitor::FCMP_reg(u64) { + ThrowNotImplemented(Opcode::FCMP_reg); +} + +void TranslatorVisitor::FCMP_rc(u64) { + ThrowNotImplemented(Opcode::FCMP_rc); +} + +void TranslatorVisitor::FCMP_cr(u64) { + ThrowNotImplemented(Opcode::FCMP_cr); +} + +void TranslatorVisitor::FCMP_imm(u64) { + ThrowNotImplemented(Opcode::FCMP_imm); +} + +void TranslatorVisitor::FFMA_reg(u64) { + ThrowNotImplemented(Opcode::FFMA_reg); +} + +void TranslatorVisitor::FFMA_rc(u64) { + ThrowNotImplemented(Opcode::FFMA_rc); +} + +void TranslatorVisitor::FFMA_cr(u64) { + ThrowNotImplemented(Opcode::FFMA_cr); +} + +void TranslatorVisitor::FFMA_imm(u64) { + ThrowNotImplemented(Opcode::FFMA_imm); +} + +void TranslatorVisitor::FFMA32I(u64) { + ThrowNotImplemented(Opcode::FFMA32I); +} + +void TranslatorVisitor::FLO_reg(u64) { + ThrowNotImplemented(Opcode::FLO_reg); +} + +void TranslatorVisitor::FLO_cbuf(u64) { + ThrowNotImplemented(Opcode::FLO_cbuf); +} + +void TranslatorVisitor::FLO_imm(u64) { + ThrowNotImplemented(Opcode::FLO_imm); +} + +void TranslatorVisitor::FMNMX_reg(u64) { + ThrowNotImplemented(Opcode::FMNMX_reg); +} + +void TranslatorVisitor::FMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::FMNMX_cbuf); +} + +void TranslatorVisitor::FMNMX_imm(u64) { + ThrowNotImplemented(Opcode::FMNMX_imm); +} + +void TranslatorVisitor::FMUL_reg(u64) { + ThrowNotImplemented(Opcode::FMUL_reg); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::FMUL_cbuf); +} + +void TranslatorVisitor::FMUL_imm(u64) { + ThrowNotImplemented(Opcode::FMUL_imm); +} + +void TranslatorVisitor::FMUL32I(u64) { + ThrowNotImplemented(Opcode::FMUL32I); +} + +void TranslatorVisitor::FSET_reg(u64) { + ThrowNotImplemented(Opcode::FSET_reg); +} + +void TranslatorVisitor::FSET_cbuf(u64) { + ThrowNotImplemented(Opcode::FSET_cbuf); +} + +void TranslatorVisitor::FSET_imm(u64) { + ThrowNotImplemented(Opcode::FSET_imm); +} + +void TranslatorVisitor::FSETP_reg(u64) { + ThrowNotImplemented(Opcode::FSETP_reg); +} + +void TranslatorVisitor::FSETP_cbuf(u64) { + ThrowNotImplemented(Opcode::FSETP_cbuf); +} + +void TranslatorVisitor::FSETP_imm(u64) { + ThrowNotImplemented(Opcode::FSETP_imm); +} + +void TranslatorVisitor::FSWZADD(u64) { + ThrowNotImplemented(Opcode::FSWZADD); +} + +void TranslatorVisitor::GETCRSPTR(u64) { + ThrowNotImplemented(Opcode::GETCRSPTR); +} + +void TranslatorVisitor::GETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::GETLMEMBASE); +} + +void TranslatorVisitor::HADD2_reg(u64) { + ThrowNotImplemented(Opcode::HADD2_reg); +} + +void TranslatorVisitor::HADD2_cbuf(u64) { + ThrowNotImplemented(Opcode::HADD2_cbuf); +} + +void TranslatorVisitor::HADD2_imm(u64) { + ThrowNotImplemented(Opcode::HADD2_imm); +} + +void TranslatorVisitor::HADD2_32I(u64) { + ThrowNotImplemented(Opcode::HADD2_32I); +} + +void TranslatorVisitor::HFMA2_reg(u64) { + ThrowNotImplemented(Opcode::HFMA2_reg); +} + +void TranslatorVisitor::HFMA2_rc(u64) { + ThrowNotImplemented(Opcode::HFMA2_rc); +} + +void TranslatorVisitor::HFMA2_cr(u64) { + ThrowNotImplemented(Opcode::HFMA2_cr); +} + +void TranslatorVisitor::HFMA2_imm(u64) { + ThrowNotImplemented(Opcode::HFMA2_imm); +} + +void TranslatorVisitor::HFMA2_32I(u64) { + ThrowNotImplemented(Opcode::HFMA2_32I); +} + +void TranslatorVisitor::HMUL2_reg(u64) { + ThrowNotImplemented(Opcode::HMUL2_reg); +} + +void TranslatorVisitor::HMUL2_cbuf(u64) { + ThrowNotImplemented(Opcode::HMUL2_cbuf); +} + +void TranslatorVisitor::HMUL2_imm(u64) { + ThrowNotImplemented(Opcode::HMUL2_imm); +} + +void TranslatorVisitor::HMUL2_32I(u64) { + ThrowNotImplemented(Opcode::HMUL2_32I); +} + +void TranslatorVisitor::HSET2_reg(u64) { + ThrowNotImplemented(Opcode::HSET2_reg); +} + +void TranslatorVisitor::HSET2_cbuf(u64) { + ThrowNotImplemented(Opcode::HSET2_cbuf); +} + +void TranslatorVisitor::HSET2_imm(u64) { + ThrowNotImplemented(Opcode::HSET2_imm); +} + +void TranslatorVisitor::HSETP2_reg(u64) { + ThrowNotImplemented(Opcode::HSETP2_reg); +} + +void TranslatorVisitor::HSETP2_cbuf(u64) { + ThrowNotImplemented(Opcode::HSETP2_cbuf); +} + +void TranslatorVisitor::HSETP2_imm(u64) { + ThrowNotImplemented(Opcode::HSETP2_imm); +} + +void TranslatorVisitor::I2F_reg(u64) { + ThrowNotImplemented(Opcode::I2F_reg); +} + +void TranslatorVisitor::I2F_cbuf(u64) { + ThrowNotImplemented(Opcode::I2F_cbuf); +} + +void TranslatorVisitor::I2F_imm(u64) { + ThrowNotImplemented(Opcode::I2F_imm); +} + +void TranslatorVisitor::I2I_reg(u64) { + ThrowNotImplemented(Opcode::I2I_reg); +} + +void TranslatorVisitor::I2I_cbuf(u64) { + ThrowNotImplemented(Opcode::I2I_cbuf); +} + +void TranslatorVisitor::I2I_imm(u64) { + ThrowNotImplemented(Opcode::I2I_imm); +} + +void TranslatorVisitor::IADD_reg(u64) { + ThrowNotImplemented(Opcode::IADD_reg); +} + +void TranslatorVisitor::IADD_cbuf(u64) { + ThrowNotImplemented(Opcode::IADD_cbuf); +} + +void TranslatorVisitor::IADD_imm(u64) { + ThrowNotImplemented(Opcode::IADD_imm); +} + +void TranslatorVisitor::IADD3_reg(u64) { + ThrowNotImplemented(Opcode::IADD3_reg); +} + +void TranslatorVisitor::IADD3_cbuf(u64) { + ThrowNotImplemented(Opcode::IADD3_cbuf); +} + +void TranslatorVisitor::IADD3_imm(u64) { + ThrowNotImplemented(Opcode::IADD3_imm); +} + +void TranslatorVisitor::IADD32I(u64) { + ThrowNotImplemented(Opcode::IADD32I); +} + +void TranslatorVisitor::ICMP_reg(u64) { + ThrowNotImplemented(Opcode::ICMP_reg); +} + +void TranslatorVisitor::ICMP_rc(u64) { + ThrowNotImplemented(Opcode::ICMP_rc); +} + +void TranslatorVisitor::ICMP_cr(u64) { + ThrowNotImplemented(Opcode::ICMP_cr); +} + +void TranslatorVisitor::ICMP_imm(u64) { + ThrowNotImplemented(Opcode::ICMP_imm); +} + +void TranslatorVisitor::IDE(u64) { + ThrowNotImplemented(Opcode::IDE); +} + +void TranslatorVisitor::IDP_reg(u64) { + ThrowNotImplemented(Opcode::IDP_reg); +} + +void TranslatorVisitor::IDP_imm(u64) { + ThrowNotImplemented(Opcode::IDP_imm); +} + +void TranslatorVisitor::IMAD_reg(u64) { + ThrowNotImplemented(Opcode::IMAD_reg); +} + +void TranslatorVisitor::IMAD_rc(u64) { + ThrowNotImplemented(Opcode::IMAD_rc); +} + +void TranslatorVisitor::IMAD_cr(u64) { + ThrowNotImplemented(Opcode::IMAD_cr); +} + +void TranslatorVisitor::IMAD_imm(u64) { + ThrowNotImplemented(Opcode::IMAD_imm); +} + +void TranslatorVisitor::IMAD32I(u64) { + ThrowNotImplemented(Opcode::IMAD32I); +} + +void TranslatorVisitor::IMADSP_reg(u64) { + ThrowNotImplemented(Opcode::IMADSP_reg); +} + +void TranslatorVisitor::IMADSP_rc(u64) { + ThrowNotImplemented(Opcode::IMADSP_rc); +} + +void TranslatorVisitor::IMADSP_cr(u64) { + ThrowNotImplemented(Opcode::IMADSP_cr); +} + +void TranslatorVisitor::IMADSP_imm(u64) { + ThrowNotImplemented(Opcode::IMADSP_imm); +} + +void TranslatorVisitor::IMNMX_reg(u64) { + ThrowNotImplemented(Opcode::IMNMX_reg); +} + +void TranslatorVisitor::IMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::IMNMX_cbuf); +} + +void TranslatorVisitor::IMNMX_imm(u64) { + ThrowNotImplemented(Opcode::IMNMX_imm); +} + +void TranslatorVisitor::IMUL_reg(u64) { + ThrowNotImplemented(Opcode::IMUL_reg); +} + +void TranslatorVisitor::IMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::IMUL_cbuf); +} + +void TranslatorVisitor::IMUL_imm(u64) { + ThrowNotImplemented(Opcode::IMUL_imm); +} + +void TranslatorVisitor::IMUL32I(u64) { + ThrowNotImplemented(Opcode::IMUL32I); +} + +void TranslatorVisitor::ISBERD(u64) { + ThrowNotImplemented(Opcode::ISBERD); +} + +void TranslatorVisitor::ISCADD_reg(u64) { + ThrowNotImplemented(Opcode::ISCADD_reg); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + ThrowNotImplemented(Opcode::ISCADD_cbuf); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + ThrowNotImplemented(Opcode::ISCADD_imm); +} + +void TranslatorVisitor::ISCADD32I(u64) { + ThrowNotImplemented(Opcode::ISCADD32I); +} + +void TranslatorVisitor::ISET_reg(u64) { + ThrowNotImplemented(Opcode::ISET_reg); +} + +void TranslatorVisitor::ISET_cbuf(u64) { + ThrowNotImplemented(Opcode::ISET_cbuf); +} + +void TranslatorVisitor::ISET_imm(u64) { + ThrowNotImplemented(Opcode::ISET_imm); +} + +void TranslatorVisitor::ISETP_reg(u64) { + ThrowNotImplemented(Opcode::ISETP_reg); +} + +void TranslatorVisitor::ISETP_cbuf(u64) { + ThrowNotImplemented(Opcode::ISETP_cbuf); +} + +void TranslatorVisitor::ISETP_imm(u64) { + ThrowNotImplemented(Opcode::ISETP_imm); +} + +void TranslatorVisitor::JCAL(u64) { + ThrowNotImplemented(Opcode::JCAL); +} + +void TranslatorVisitor::JMP(u64) { + ThrowNotImplemented(Opcode::JMP); +} + +void TranslatorVisitor::JMX(u64) { + ThrowNotImplemented(Opcode::JMX); +} + +void TranslatorVisitor::KIL(u64) { + ThrowNotImplemented(Opcode::KIL); +} + +void TranslatorVisitor::LD(u64) { + ThrowNotImplemented(Opcode::LD); +} + +void TranslatorVisitor::LDC(u64) { + ThrowNotImplemented(Opcode::LDC); +} + +void TranslatorVisitor::LDG(u64) { + ThrowNotImplemented(Opcode::LDG); +} + +void TranslatorVisitor::LDL(u64) { + ThrowNotImplemented(Opcode::LDL); +} + +void TranslatorVisitor::LDS(u64) { + ThrowNotImplemented(Opcode::LDS); +} + +void TranslatorVisitor::LEA_hi_reg(u64) { + ThrowNotImplemented(Opcode::LEA_hi_reg); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64) { + ThrowNotImplemented(Opcode::LEA_hi_cbuf); +} + +void TranslatorVisitor::LEA_lo_reg(u64) { + ThrowNotImplemented(Opcode::LEA_lo_reg); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64) { + ThrowNotImplemented(Opcode::LEA_lo_cbuf); +} + +void TranslatorVisitor::LEA_lo_imm(u64) { + ThrowNotImplemented(Opcode::LEA_lo_imm); +} + +void TranslatorVisitor::LEPC(u64) { + ThrowNotImplemented(Opcode::LEPC); +} + +void TranslatorVisitor::LONGJMP(u64) { + ThrowNotImplemented(Opcode::LONGJMP); +} + +void TranslatorVisitor::LOP_reg(u64) { + ThrowNotImplemented(Opcode::LOP_reg); +} + +void TranslatorVisitor::LOP_cbuf(u64) { + ThrowNotImplemented(Opcode::LOP_cbuf); +} + +void TranslatorVisitor::LOP_imm(u64) { + ThrowNotImplemented(Opcode::LOP_imm); +} + +void TranslatorVisitor::LOP3_reg(u64) { + ThrowNotImplemented(Opcode::LOP3_reg); +} + +void TranslatorVisitor::LOP3_cbuf(u64) { + ThrowNotImplemented(Opcode::LOP3_cbuf); +} + +void TranslatorVisitor::LOP3_imm(u64) { + ThrowNotImplemented(Opcode::LOP3_imm); +} + +void TranslatorVisitor::LOP32I(u64) { + ThrowNotImplemented(Opcode::LOP32I); +} + +void TranslatorVisitor::MEMBAR(u64) { + ThrowNotImplemented(Opcode::MEMBAR); +} + +void TranslatorVisitor::MOV32I(u64) { + ThrowNotImplemented(Opcode::MOV32I); +} + +void TranslatorVisitor::NOP(u64) { + ThrowNotImplemented(Opcode::NOP); +} + +void TranslatorVisitor::OUT_reg(u64) { + ThrowNotImplemented(Opcode::OUT_reg); +} + +void TranslatorVisitor::OUT_cbuf(u64) { + ThrowNotImplemented(Opcode::OUT_cbuf); +} + +void TranslatorVisitor::OUT_imm(u64) { + ThrowNotImplemented(Opcode::OUT_imm); +} + +void TranslatorVisitor::P2R_reg(u64) { + ThrowNotImplemented(Opcode::P2R_reg); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + ThrowNotImplemented(Opcode::P2R_cbuf); +} + +void TranslatorVisitor::P2R_imm(u64) { + ThrowNotImplemented(Opcode::P2R_imm); +} + +void TranslatorVisitor::PBK(u64) { + // PBK is a no-op +} + +void TranslatorVisitor::PCNT(u64) { + ThrowNotImplemented(Opcode::PCNT); +} + +void TranslatorVisitor::PEXIT(u64) { + ThrowNotImplemented(Opcode::PEXIT); +} + +void TranslatorVisitor::PIXLD(u64) { + ThrowNotImplemented(Opcode::PIXLD); +} + +void TranslatorVisitor::PLONGJMP(u64) { + ThrowNotImplemented(Opcode::PLONGJMP); +} + +void TranslatorVisitor::POPC_reg(u64) { + ThrowNotImplemented(Opcode::POPC_reg); +} + +void TranslatorVisitor::POPC_cbuf(u64) { + ThrowNotImplemented(Opcode::POPC_cbuf); +} + +void TranslatorVisitor::POPC_imm(u64) { + ThrowNotImplemented(Opcode::POPC_imm); +} + +void TranslatorVisitor::PRET(u64) { + ThrowNotImplemented(Opcode::PRET); +} + +void TranslatorVisitor::PRMT_reg(u64) { + ThrowNotImplemented(Opcode::PRMT_reg); +} + +void TranslatorVisitor::PRMT_rc(u64) { + ThrowNotImplemented(Opcode::PRMT_rc); +} + +void TranslatorVisitor::PRMT_cr(u64) { + ThrowNotImplemented(Opcode::PRMT_cr); +} + +void TranslatorVisitor::PRMT_imm(u64) { + ThrowNotImplemented(Opcode::PRMT_imm); +} + +void TranslatorVisitor::PSET(u64) { + ThrowNotImplemented(Opcode::PSET); +} + +void TranslatorVisitor::PSETP(u64) { + ThrowNotImplemented(Opcode::PSETP); +} + +void TranslatorVisitor::R2B(u64) { + ThrowNotImplemented(Opcode::R2B); +} + +void TranslatorVisitor::R2P_reg(u64) { + ThrowNotImplemented(Opcode::R2P_reg); +} + +void TranslatorVisitor::R2P_cbuf(u64) { + ThrowNotImplemented(Opcode::R2P_cbuf); +} + +void TranslatorVisitor::R2P_imm(u64) { + ThrowNotImplemented(Opcode::R2P_imm); +} + +void TranslatorVisitor::RAM(u64) { + ThrowNotImplemented(Opcode::RAM); +} + +void TranslatorVisitor::RED(u64) { + ThrowNotImplemented(Opcode::RED); +} + +void TranslatorVisitor::RET(u64) { + ThrowNotImplemented(Opcode::RET); +} + +void TranslatorVisitor::RRO_reg(u64) { + ThrowNotImplemented(Opcode::RRO_reg); +} + +void TranslatorVisitor::RRO_cbuf(u64) { + ThrowNotImplemented(Opcode::RRO_cbuf); +} + +void TranslatorVisitor::RRO_imm(u64) { + ThrowNotImplemented(Opcode::RRO_imm); +} + +void TranslatorVisitor::RTT(u64) { + ThrowNotImplemented(Opcode::RTT); +} + +void TranslatorVisitor::S2R(u64) { + ThrowNotImplemented(Opcode::S2R); +} + +void TranslatorVisitor::SAM(u64) { + ThrowNotImplemented(Opcode::SAM); +} + +void TranslatorVisitor::SEL_reg(u64) { + ThrowNotImplemented(Opcode::SEL_reg); +} + +void TranslatorVisitor::SEL_cbuf(u64) { + ThrowNotImplemented(Opcode::SEL_cbuf); +} + +void TranslatorVisitor::SEL_imm(u64) { + ThrowNotImplemented(Opcode::SEL_imm); +} + +void TranslatorVisitor::SETCRSPTR(u64) { + ThrowNotImplemented(Opcode::SETCRSPTR); +} + +void TranslatorVisitor::SETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::SETLMEMBASE); +} + +void TranslatorVisitor::SHF_l_reg(u64) { + ThrowNotImplemented(Opcode::SHF_l_reg); +} + +void TranslatorVisitor::SHF_l_imm(u64) { + ThrowNotImplemented(Opcode::SHF_l_imm); +} + +void TranslatorVisitor::SHF_r_reg(u64) { + ThrowNotImplemented(Opcode::SHF_r_reg); +} + +void TranslatorVisitor::SHF_r_imm(u64) { + ThrowNotImplemented(Opcode::SHF_r_imm); +} + +void TranslatorVisitor::SHFL(u64) { + ThrowNotImplemented(Opcode::SHFL); +} + +void TranslatorVisitor::SHL_reg(u64) { + ThrowNotImplemented(Opcode::SHL_reg); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + ThrowNotImplemented(Opcode::SHL_cbuf); +} + +void TranslatorVisitor::SHL_imm(u64) { + ThrowNotImplemented(Opcode::SHL_imm); +} + +void TranslatorVisitor::SHR_reg(u64) { + ThrowNotImplemented(Opcode::SHR_reg); +} + +void TranslatorVisitor::SHR_cbuf(u64) { + ThrowNotImplemented(Opcode::SHR_cbuf); +} + +void TranslatorVisitor::SHR_imm(u64) { + ThrowNotImplemented(Opcode::SHR_imm); +} + +void TranslatorVisitor::SSY(u64) { + ThrowNotImplemented(Opcode::SSY); +} + +void TranslatorVisitor::ST(u64) { + ThrowNotImplemented(Opcode::ST); +} + +void TranslatorVisitor::STL(u64) { + ThrowNotImplemented(Opcode::STL); +} + +void TranslatorVisitor::STP(u64) { + ThrowNotImplemented(Opcode::STP); +} + +void TranslatorVisitor::STS(u64) { + ThrowNotImplemented(Opcode::STS); +} + +void TranslatorVisitor::SUATOM_cas(u64) { + ThrowNotImplemented(Opcode::SUATOM_cas); +} + +void TranslatorVisitor::SULD(u64) { + ThrowNotImplemented(Opcode::SULD); +} + +void TranslatorVisitor::SURED(u64) { + ThrowNotImplemented(Opcode::SURED); +} + +void TranslatorVisitor::SUST(u64) { + ThrowNotImplemented(Opcode::SUST); +} + +void TranslatorVisitor::SYNC(u64) { + ThrowNotImplemented(Opcode::SYNC); +} + +void TranslatorVisitor::TEX(u64) { + ThrowNotImplemented(Opcode::TEX); +} + +void TranslatorVisitor::TEX_b(u64) { + ThrowNotImplemented(Opcode::TEX_b); +} + +void TranslatorVisitor::TEXS(u64) { + ThrowNotImplemented(Opcode::TEXS); +} + +void TranslatorVisitor::TLD(u64) { + ThrowNotImplemented(Opcode::TLD); +} + +void TranslatorVisitor::TLD_b(u64) { + ThrowNotImplemented(Opcode::TLD_b); +} + +void TranslatorVisitor::TLD4(u64) { + ThrowNotImplemented(Opcode::TLD4); +} + +void TranslatorVisitor::TLD4_b(u64) { + ThrowNotImplemented(Opcode::TLD4_b); +} + +void TranslatorVisitor::TLD4S(u64) { + ThrowNotImplemented(Opcode::TLD4S); +} + +void TranslatorVisitor::TLDS(u64) { + ThrowNotImplemented(Opcode::TLDS); +} + +void TranslatorVisitor::TMML(u64) { + ThrowNotImplemented(Opcode::TMML); +} + +void TranslatorVisitor::TMML_b(u64) { + ThrowNotImplemented(Opcode::TMML_b); +} + +void TranslatorVisitor::TXA(u64) { + ThrowNotImplemented(Opcode::TXA); +} + +void TranslatorVisitor::TXD(u64) { + ThrowNotImplemented(Opcode::TXD); +} + +void TranslatorVisitor::TXD_b(u64) { + ThrowNotImplemented(Opcode::TXD_b); +} + +void TranslatorVisitor::TXQ(u64) { + ThrowNotImplemented(Opcode::TXQ); +} + +void TranslatorVisitor::TXQ_b(u64) { + ThrowNotImplemented(Opcode::TXQ_b); +} + +void TranslatorVisitor::VABSDIFF(u64) { + ThrowNotImplemented(Opcode::VABSDIFF); +} + +void TranslatorVisitor::VABSDIFF4(u64) { + ThrowNotImplemented(Opcode::VABSDIFF4); +} + +void TranslatorVisitor::VADD(u64) { + ThrowNotImplemented(Opcode::VADD); +} + +void TranslatorVisitor::VMAD(u64) { + ThrowNotImplemented(Opcode::VMAD); +} + +void TranslatorVisitor::VMNMX(u64) { + ThrowNotImplemented(Opcode::VMNMX); +} + +void TranslatorVisitor::VOTE(u64) { + ThrowNotImplemented(Opcode::VOTE); +} + +void TranslatorVisitor::VOTE_vtg(u64) { + ThrowNotImplemented(Opcode::VOTE_vtg); +} + +void TranslatorVisitor::VSET(u64) { + ThrowNotImplemented(Opcode::VSET); +} + +void TranslatorVisitor::VSETP(u64) { + ThrowNotImplemented(Opcode::VSETP); +} + +void TranslatorVisitor::VSHL(u64) { + ThrowNotImplemented(Opcode::VSHL); +} + +void TranslatorVisitor::VSHR(u64) { + ThrowNotImplemented(Opcode::VSHR); +} + +void TranslatorVisitor::XMAD_reg(u64) { + ThrowNotImplemented(Opcode::XMAD_reg); +} + +void TranslatorVisitor::XMAD_rc(u64) { + ThrowNotImplemented(Opcode::XMAD_rc); +} + +void TranslatorVisitor::XMAD_cr(u64) { + ThrowNotImplemented(Opcode::XMAD_cr); +} + +void TranslatorVisitor::XMAD_imm(u64) { + ThrowNotImplemented(Opcode::XMAD_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp new file mode 100644 index 000000000..7fa35ba3a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +union MOV { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> src_reg; + BitField<39, 4, u64> mask; +}; + +void CheckMask(MOV mov) { + if (mov.mask != 0xf) { + throw NotImplementedException("Non-full move mask"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, X(mov.src_reg)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetImm(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..66a306745 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +template +static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { + using MethodType = decltype(visitor_method); + if constexpr (std::is_invocable_r_v) { + (visitor.*visitor_method)(pc, insn); + } else if constexpr (std::is_invocable_r_v) { + (visitor.*visitor_method)(insn); + } else { + (visitor.*visitor_method)(); + } +} + +IR::Block Translate(Environment& env, const Flow::Block& flow_block) { + IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; + TranslatorVisitor visitor{env, block}; + + const Location pc_end{flow_block.end}; + Location pc{flow_block.begin}; + while (pc != pc_end) { + const u64 insn{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(insn)}; + switch (opcode) { +#define INST(name, cute, mask) \ + case Opcode::name: \ + Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ + break; +#include "shader_recompiler/frontend/maxwell/maxwell.inc" +#undef OPCODE + default: + throw LogicError("Invalid opcode {}", opcode); + } + ++pc; + } + return block; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..788742dea --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp new file mode 100644 index 000000000..bbaa412f6 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void DeadCodeEliminationPass(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (IR::Inst& inst : std::views::reverse(block)) { + if (!inst.HasUses() && !inst.MayHaveSideEffects()) { + inst.Invalidate(); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp new file mode 100644 index 000000000..21b8526cd --- /dev/null +++ b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp @@ -0,0 +1,87 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +using Iterator = IR::Block::iterator; + +enum class TrackingType { + Reg, +}; + +struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + Iterator last_set_instruction; + bool set_instruction_present = false; +}; + +void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst, + TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; +} + +RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) { + RegisterInfo info{}; + info.register_value = IR::Value{&*get_inst}; + info.tracking_type = tracking_type; + return info; +} + +void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + if (info.register_value.IsEmpty()) { + info = Nothing(get_inst, tracking_type); + return; + } + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + info = Nothing(get_inst, tracking_type); +} +} // Anonymous namespace + +void GetSetElimination(IR::Block& block) { + std::array reg_info; + + for (Iterator inst = block.begin(); inst != block.end(); ++inst) { + switch (inst->Opcode()) { + case IR::Opcode::GetRegister: { + const IR::Reg reg{inst->Arg(0).Reg()}; + if (reg == IR::Reg::RZ) { + break; + } + const size_t index{static_cast(reg)}; + DoGet(reg_info.at(index), inst, TrackingType::Reg); + break; + } + case IR::Opcode::SetRegister: { + const IR::Reg reg{inst->Arg(0).Reg()}; + if (reg == IR::Reg::RZ) { + break; + } + const size_t index{static_cast(reg)}; + DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg); + break; + } + default: + break; + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp new file mode 100644 index 000000000..f9bb063fb --- /dev/null +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -0,0 +1,37 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void IdentityRemovalPass(IR::Block& block) { + std::vector to_invalidate; + + for (auto inst = block.begin(); inst != block.end();) { + const size_t num_args{inst->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg; + while ((arg = inst->Arg(i)).IsIdentity()) { + inst->SetArg(i, arg.Inst()->Arg(0)); + } + } + if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*inst); + inst = block.Instructions().erase(inst); + } else { + ++inst; + } + } + + for (IR::Inst* const inst : to_invalidate) { + inst->Invalidate(); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h new file mode 100644 index 000000000..fe5454e9a --- /dev/null +++ b/src/shader_recompiler/ir_opt/passes.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::Optimization { + +void DeadCodeEliminationPass(IR::Block& block); +void GetSetElimination(IR::Block& block); +void IdentityRemovalPass(IR::Block& block); +void VerificationPass(const IR::Block& block); + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp new file mode 100644 index 000000000..36d9ae39b --- /dev/null +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +static void ValidateTypes(const IR::Block& block) { + for (const IR::Inst& inst : block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Type t1{inst.Arg(i).Type()}; + const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + if (!IR::AreTypesCompatible(t1, t2)) { + throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); + } + } + } +} + +static void ValidateUses(const IR::Block& block) { + std::map actual_uses; + for (const IR::Inst& inst : block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Value arg{inst.Arg(i)}; + if (!arg.IsImmediate()) { + ++actual_uses[arg.Inst()]; + } + } + } + for (const auto [inst, uses] : actual_uses) { + if (inst->UseCount() != uses) { + throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); + } + } +} + +void VerificationPass(const IR::Block& block) { + ValidateTypes(block); + ValidateUses(block); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp new file mode 100644 index 000000000..39f0bf333 --- /dev/null +++ b/src/shader_recompiler/main.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include + +#include "shader_recompiler/file_environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +using namespace Shader; +using namespace Shader::Maxwell; + +template +static void ForEachFile(const std::filesystem::path& path, Func&& func) { + std::filesystem::directory_iterator end; + for (std::filesystem::directory_iterator it{path}; it != end; ++it) { + if (std::filesystem::is_directory(*it)) { + ForEachFile(*it, func); + } else { + func(*it); + } + } +} + +void RunDatabase() { + std::vector> map; + ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { + map.emplace_back(std::make_unique(path.string().c_str())); + }); + for (int i = 0; i < 1; ++i) { + for (auto& env : map) { + // fmt::print(stdout, "Decoding {}\n", path.string()); + const Location start_address{0}; + auto cfg{std::make_unique(*env, start_address)}; + // fmt::print(stdout, "{}\n", cfg.Dot()); + // IR::Program program{env, cfg}; + // Optimize(program); + // const std::string code{EmitGLASM(program)}; + } + } +} + +int main() { + // RunDatabase(); + + FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + auto cfg{std::make_unique(env, 0)}; + // fmt::print(stdout, "{}\n", cfg->Dot()); + + Program program{env, *cfg}; + fmt::print(stdout, "{}\n", DumpProgram(program)); +} From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 2 Feb 2021 21:07:00 -0300 Subject: [PATCH 003/770] shader: SSA and dominance --- src/shader_recompiler/CMakeLists.txt | 3 + .../frontend/ir/basic_block.cpp | 49 ++++-- .../frontend/ir/basic_block.h | 20 ++- .../frontend/ir/function.cpp | 5 + src/shader_recompiler/frontend/ir/function.h | 25 +++ .../frontend/ir/microinstruction.cpp | 22 +++ .../frontend/ir/microinstruction.h | 10 ++ src/shader_recompiler/frontend/ir/opcode.inc | 8 + src/shader_recompiler/frontend/ir/pred.h | 7 + src/shader_recompiler/frontend/ir/reg.h | 9 +- src/shader_recompiler/frontend/ir/value.cpp | 37 +++++ src/shader_recompiler/frontend/ir/value.h | 3 + .../frontend/maxwell/control_flow.cpp | 130 ++++++++++++++- .../frontend/maxwell/control_flow.h | 44 ++++- .../frontend/maxwell/program.cpp | 75 +++++---- .../frontend/maxwell/program.h | 11 +- .../frontend/maxwell/termination_code.cpp | 7 + .../frontend/maxwell/termination_code.h | 1 + .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/not_implemented.cpp | 6 +- .../ir_opt/identity_removal_pass.cpp | 1 - src/shader_recompiler/ir_opt/passes.h | 9 + .../ir_opt/ssa_rewrite_pass.cpp | 155 ++++++++++++++++++ src/shader_recompiler/main.cpp | 4 +- 24 files changed, 569 insertions(+), 76 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/function.cpp create mode 100644 src/shader_recompiler/frontend/ir/function.h create mode 100644 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index c65846bc4..36a61f21a 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -11,6 +11,8 @@ add_executable(shader_recompiler frontend/ir/condition.h frontend/ir/flow_test.cpp frontend/ir/flow_test.h + frontend/ir/function.cpp + frontend/ir/function.h frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp @@ -51,6 +53,7 @@ add_executable(shader_recompiler ir_opt/get_set_elimination_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/passes.h + ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp main.cpp ) diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 0406726ad..e795618fc 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -37,6 +37,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } +void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { + imm_predecessors.push_back(immediate_predecessor); +} + u32 Block::LocationBegin() const noexcept { return location_begin; } @@ -53,6 +57,18 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +std::span Block::ImmediatePredecessors() const noexcept { + return imm_predecessors; +} + +static std::string BlockToIndex(const std::map& block_to_index, + Block* block) { + if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$", reinterpret_cast(block)); +} + static std::string ArgToIndex(const std::map& block_to_index, const std::map& inst_to_index, const Value& arg) { @@ -60,10 +76,7 @@ static std::string ArgToIndex(const std::map& block_to_ind return ""; } if (arg.IsLabel()) { - if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { - return fmt::format("{{Block ${}}}", it->second); - } - return fmt::format("$", reinterpret_cast(arg.Label())); + return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { @@ -115,16 +128,26 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); + if (op == Opcode::Phi) { + size_t val_index{0}; + for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { + ret += val_index != 0 ? ", " : " "; + ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), + BlockToIndex(block_to_index, phi_block)); + ++val_index; + } + } else { + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("", actual_type, expected_type); + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3ed2eb957..4b6b80c4b 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include @@ -36,7 +38,11 @@ public: void AppendNewInst(Opcode op, std::initializer_list args); /// Prepends a new instruction to this basic block before the insertion point. - iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args); + iterator PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list args = {}); + + /// Adds a new immediate predecessor to the basic block. + void AddImmediatePredecessor(IR::Block* immediate_predecessor); /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; @@ -44,9 +50,12 @@ public: [[nodiscard]] u32 LocationEnd() const noexcept; /// Gets a mutable reference to the instruction list for this basic block. - InstructionList& Instructions() noexcept; + [[nodiscard]] InstructionList& Instructions() noexcept; /// Gets an immutable reference to the instruction list for this basic block. - const InstructionList& Instructions() const noexcept; + [[nodiscard]] const InstructionList& Instructions() const noexcept; + + /// Gets an immutable span to the immediate predecessors. + [[nodiscard]] std::span ImmediatePredecessors() const noexcept; [[nodiscard]] bool empty() const { return instructions.empty(); @@ -115,13 +124,16 @@ private: /// End location of this block u32 location_end; - /// List of instructions in this block. + /// List of instructions in this block InstructionList instructions; /// Memory pool for instruction list boost::fast_pool_allocator instruction_alloc_pool; + + /// Block immediate predecessors + std::vector imm_predecessors; }; [[nodiscard]] std::string DumpBlock(const Block& block); diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp new file mode 100644 index 000000000..d1fc9461d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.cpp @@ -0,0 +1,5 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h new file mode 100644 index 000000000..2d4dc5b98 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.h @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +struct Function { + struct InplaceDelete { + void operator()(IR::Block* block) const noexcept { + std::destroy_at(block); + } + }; + using UniqueBlock = std::unique_ptr; + + std::vector blocks; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 553fec3b7..ecf76e23d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -30,6 +30,11 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) bool Inst::MayHaveSideEffects() const noexcept { switch (op) { + case Opcode::Branch: + case Opcode::BranchConditional: + case Opcode::Exit: + case Opcode::Return: + case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: @@ -113,6 +118,17 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } +std::span> Inst::PhiOperands() const noexcept { + return phi_operands; +} + +void Inst::AddPhiOperand(Block* predecessor, const Value& value) { + if (!value.IsImmediate()) { + Use(value); + } + phi_operands.emplace_back(predecessor, value); +} + void Inst::Invalidate() { ClearArgs(); op = Opcode::Void; @@ -125,6 +141,12 @@ void Inst::ClearArgs() { } value = {}; } + for (auto& [phi_block, phi_op] : phi_operands) { + if (!phi_op.IsImmediate()) { + UndoUse(phi_op); + } + } + phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 43460b950..7f1ed6710 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include @@ -15,6 +17,8 @@ namespace Shader::IR { +class Block; + constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { @@ -59,6 +63,11 @@ public: /// Set the value of a given argument index. void SetArg(size_t index, Value value); + /// Get an immutable span to the phi operands. + [[nodiscard]] std::span> PhiOperands() const noexcept; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + void Invalidate(); void ClearArgs(); @@ -76,6 +85,7 @@ private: Inst* carry_inst{}; Inst* overflow_inst{}; Inst* zsco_inst{}; + std::vector> phi_operands; u64 flags{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 371064bf3..40759e96a 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -5,6 +5,7 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) @@ -35,6 +36,13 @@ OPCODE(SetSFlag, Void, U1, OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +// Undefined +OPCODE(Undef1, U1, ) +OPCODE(Undef8, U8, ) +OPCODE(Undef16, U16, ) +OPCODE(Undef32, U32, ) +OPCODE(Undef64, U64, ) + // Memory operations OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index 37cc53006..daf23193f 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -10,6 +10,13 @@ namespace Shader::IR { enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +constexpr size_t NUM_USER_PREDS = 6; +constexpr size_t NUM_PREDS = 7; + +[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { + return static_cast(pred); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 316fc4be8..771094eb9 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -271,6 +271,9 @@ enum class Reg : u64 { }; static_assert(static_cast(Reg::RZ) == 255); +constexpr size_t NUM_USER_REGS = 255; +constexpr size_t NUM_REGS = 256; + [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { if (reg == Reg::RZ) { // Adding or subtracting registers from RZ yields RZ @@ -290,8 +293,12 @@ static_assert(static_cast(Reg::RZ) == 255); return reg + (-num); } +[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { + return static_cast(reg); +} + [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { - return (static_cast(reg) / align) * align == static_cast(reg); + return (RegIndex(reg) / align) * align == RegIndex(reg); } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 7b5b35d6c..1e974e88c 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -115,6 +115,43 @@ u64 Value::U64() const { return imm_u64; } +bool Value::operator==(const Value& other) const { + if (type != other.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Opaque: + return inst == other.inst; + case Type::Label: + return label == other.label; + case Type::Reg: + return reg == other.reg; + case Type::Pred: + return pred == other.pred; + case Type::Attribute: + return attribute == other.attribute; + case Type::U1: + return imm_u1 == other.imm_u1; + case Type::U8: + return imm_u8 == other.imm_u8; + case Type::U16: + return imm_u16 == other.imm_u16; + case Type::U32: + return imm_u32 == other.imm_u32; + case Type::U64: + return imm_u64 == other.imm_u64; + case Type::ZSCO: + throw NotImplementedException("ZSCO comparison"); + } + throw LogicError("Invalid type {}", type); +} + +bool Value::operator!=(const Value& other) const { + return !operator==(other); +} + void Value::ValidateAccess(IR::Type expected) const { if (type != expected) { throw LogicError("Reading {} out of {}", expected, type); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 664dacf9d..368119921 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -48,6 +48,9 @@ public: [[nodiscard]] u32 U32() const; [[nodiscard]] u64 U64() const; + [[nodiscard]] bool operator==(const Value& other) const; + [[nodiscard]] bool operator!=(const Value& other) const; + private: void ValidateAccess(IR::Type expected) const; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index fc4dba826..21ee98137 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -36,6 +36,7 @@ static std::array Split(Block&& block, Location pc, BlockId new_id) { .cond{true}, .branch_true{new_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }, Block{ .begin{pc}, @@ -46,6 +47,7 @@ static std::array Split(Block&& block, Location pc, BlockId new_id) { .cond{block.cond}, .branch_true{block.branch_true}, .branch_false{block.branch_false}, + .imm_predecessors{}, }, }; } @@ -108,7 +110,7 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string Name(const Block& block) { +static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.id); } else { @@ -154,13 +156,127 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(Location start_address) - : entrypoint{start_address}, labels{Label{ + : entrypoint{start_address}, labels{{ .address{start_address}, .block_id{0}, .stack{}, }} {} +void Function::BuildBlocksMap() { + const size_t num_blocks{NumBlocks()}; + blocks_map.resize(num_blocks); + for (size_t block_index = 0; block_index < num_blocks; ++block_index) { + Block& block{blocks_data[block_index]}; + blocks_map[block.id] = █ + } +} + +void Function::BuildImmediatePredecessors() { + for (const Block& block : blocks_data) { + if (block.branch_true != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); + } + if (block.branch_false != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); + } + } +} + +void Function::BuildPostOrder() { + boost::container::small_vector block_stack; + post_order_map.resize(NumBlocks()); + + Block& first_block{blocks_data[blocks.front()]}; + first_block.post_order_visited = true; + block_stack.push_back(first_block.id); + + const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { + if (branch_id == UNREACHABLE_BLOCK_ID) { + return false; + } + if (blocks_map[branch_id]->post_order_visited) { + return false; + } + blocks_map[branch_id]->post_order_visited = true; + + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block_id); + block_stack.push_back(branch_id); + return true; + }; + while (!block_stack.empty()) { + const Block* const block{blocks_map[block_stack.back()]}; + block_stack.pop_back(); + + if (!visit_branch(block->id, block->branch_true) && + !visit_branch(block->id, block->branch_false)) { + post_order_map[block->id] = static_cast(post_order_blocks.size()); + post_order_blocks.push_back(block->id); + } + } +} + +void Function::BuildImmediateDominators() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; + auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; + auto intersect{[this](Block* finger1, Block* finger2) { + while (finger1 != finger2) { + while (post_order_map[finger1->id] < post_order_map[finger2->id]) { + finger1 = finger1->imm_dominator; + } + while (post_order_map[finger2->id] < post_order_map[finger1->id]) { + finger2 = finger2->imm_dominator; + } + } + return finger1; + }}; + for (Block& block : blocks_data) { + block.imm_dominator = nullptr; + } + Block* const start_block{&blocks_data[blocks.front()]}; + start_block->imm_dominator = start_block; + + bool changed{true}; + while (changed) { + changed = false; + for (Block* const block : post_order_blocks | reverse_order_but_first) { + Block* new_idom{}; + for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { + new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; + } + changed |= block->imm_dominator != new_idom; + block->imm_dominator = new_idom; + } + } +} + +void Function::BuildDominanceFrontier() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; + for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { + for (Block* current : block.imm_predecessors | transform_block_id) { + while (current != block.imm_dominator) { + current->dominance_frontiers.push_back(current->id); + current = current->imm_dominator; + } + } + } +} + CFG::CFG(Environment& env_, Location start_address) : env{env_} { + VisitFunctions(start_address); + + for (Function& function : functions) { + function.BuildBlocksMap(); + function.BuildImmediatePredecessors(); + function.BuildPostOrder(); + function.BuildImmediateDominators(); + function.BuildDominanceFrontier(); + } +} + +void CFG::VisitFunctions(Location start_address) { functions.emplace_back(start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -202,6 +318,7 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { .cond{true}, .branch_true{UNREACHABLE_BLOCK_ID}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; @@ -310,7 +427,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.push_back(cal_pc); + functions.emplace_back(cal_pc); } // Handle CAL like a regular instruction break; @@ -352,6 +469,7 @@ void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, })}; // Set the end properties of the conditional instruction and give it a new identity Block& conditional_block{block}; @@ -465,14 +583,14 @@ std::string CFG::Dot() const { dot += fmt::format("\t\tnode [style=filled];\n"); for (const u32 block_index : function.blocks) { const Block& block{function.blocks_data[block_index]}; - const std::string name{Name(block)}; + const std::string name{NameOf(block)}; const auto add_branch = [&](BlockId branch_id, bool add_label) { const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; dot += fmt::format("\t\t{}->", name); if (it == function.blocks_data.end()) { dot += fmt::format("\"Unknown label {}\"", branch_id); } else { - dot += Name(*it); + dot += NameOf(*it); }; if (add_label && block.cond != true && block.cond != false) { dot += fmt::format(" [label=\"{}\"]", block.cond); @@ -520,7 +638,7 @@ std::string CFG::Dot() const { if (functions.front().blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index b2ab0cdc3..20ada8afd 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -70,6 +70,12 @@ struct Block { IR::Condition cond; BlockId branch_true; BlockId branch_false; + boost::container::small_vector imm_predecessors; + boost::container::small_vector dominance_frontiers; + union { + bool post_order_visited{false}; + Block* imm_dominator; + }; }; struct Label { @@ -81,11 +87,30 @@ struct Label { struct Function { Function(Location start_address); + void BuildBlocksMap(); + + void BuildImmediatePredecessors(); + + void BuildPostOrder(); + + void BuildImmediateDominators(); + + void BuildDominanceFrontier(); + + [[nodiscard]] size_t NumBlocks() const noexcept { + return static_cast(current_block_id) + 1; + } + Location entrypoint; BlockId current_block_id{0}; boost::container::small_vector labels; boost::container::small_vector blocks; boost::container::small_vector blocks_data; + // Translates from BlockId to block index + boost::container::small_vector blocks_map; + + boost::container::small_vector post_order_blocks; + boost::container::small_vector post_order_map; }; class CFG { @@ -97,6 +122,12 @@ class CFG { public: explicit CFG(Environment& env, Location start_address); + CFG& operator=(const CFG&) = delete; + CFG(const CFG&) = delete; + + CFG& operator=(CFG&&) = delete; + CFG(CFG&&) = delete; + [[nodiscard]] std::string Dot() const; [[nodiscard]] std::span Functions() const noexcept { @@ -104,20 +135,22 @@ public: } private: + void VisitFunctions(Location start_address); + void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited - [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, - Instruction inst, Opcode opcode); + bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode); void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); @@ -126,8 +159,7 @@ private: AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, - FunctionId function_id); + BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); Environment& env; boost::container::small_vector functions; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 67a98ba57..49d1f4bfb 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,40 +8,53 @@ #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { +namespace { +void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span block_map, IR::Block* block_memory) { + const size_t num_blocks{cfg_function.blocks.size()}; + function.blocks.reserve(num_blocks); -Program::Function::~Function() { - std::ranges::for_each(blocks, &std::destroy_at); + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + + function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); + block_map[flow_block.id] = function.blocks.back().get(); + ++block_memory; + } } -Program::Program(Environment& env, const Flow::CFG& cfg) { +void EmitTerminationInsts(const Flow::Function& cfg_function, + std::span block_map) { + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } +} + +void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + IR::Block* block_memory) { std::vector block_map; + block_map.resize(cfg_function.blocks_data.size()); + + TranslateCode(env, cfg_function, function, block_map, block_memory); + EmitTerminationInsts(cfg_function, block_map); +} +} // Anonymous namespace + +Program::Program(Environment& env, const Flow::CFG& cfg) { functions.reserve(cfg.Functions().size()); - for (const Flow::Function& cfg_function : cfg.Functions()) { - Function& function{functions.emplace_back()}; - - const size_t num_blocks{cfg_function.blocks.size()}; - IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; - function.blocks.reserve(num_blocks); - - block_map.resize(cfg_function.blocks_data.size()); - - // Visit the instructions of all blocks - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - - IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; - ++block_memory; - function.blocks.push_back(block); - block_map[flow_block.id] = block; - } - // Now that all blocks are defined, emit the termination instructions - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } + TranslateFunction(env, cfg_function, functions.emplace_back(), + block_alloc_pool.allocate(cfg_function.blocks.size())); + } + std::ranges::for_each(functions, Optimization::SsaRewritePass); + for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); + Optimization::Invoke(Optimization::IdentityRemovalPass, function); + // Optimization::Invoke(Optimization::VerificationPass, function); } } @@ -50,16 +63,16 @@ std::string DumpProgram(const Program& program) { std::map inst_to_index; std::map block_to_index; - for (const Program::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); + for (const IR::Function& function : program.functions) { + for (const auto& block : function.blocks) { + block_to_index.emplace(block.get(), index); ++index; } } std::string ret; - for (const Program::Function& function : program.functions) { + for (const IR::Function& function : program.functions) { ret += fmt::format("Function\n"); - for (const IR::Block* const block : function.blocks) { + for (const auto& block : function.blocks) { ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } } diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 7814b2c01..36e678a9e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -4,13 +4,16 @@ #pragma once +#include #include #include +#include #include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" namespace Shader::Maxwell { @@ -22,16 +25,10 @@ public: explicit Program(Environment& env, const Flow::CFG& cfg); private: - struct Function { - ~Function(); - - std::vector blocks; - }; - boost::pool_allocator block_alloc_pool; - std::vector functions; + boost::container::small_vector functions; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp index a4ea5c5e3..ed5137f20 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -47,12 +47,19 @@ static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { static void EmitBranch(const Flow::Block& flow_block, std::span block_map, IR::IREmitter& ir) { + const auto add_immediate_predecessor = [&](Flow::BlockId label) { + block_map[label]->AddImmediatePredecessor(&ir.block); + }; if (flow_block.cond == true) { + add_immediate_predecessor(flow_block.branch_true); return ir.Branch(block_map[flow_block.branch_true]); } if (flow_block.cond == false) { + add_immediate_predecessor(flow_block.branch_false); return ir.Branch(block_map[flow_block.branch_false]); } + add_immediate_predecessor(flow_block.branch_true); + add_immediate_predecessor(flow_block.branch_false); return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], block_map[flow_block.branch_false]); } diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h index b0d667942..04e044534 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -11,6 +11,7 @@ namespace Shader::Maxwell { +/// Emit termination instructions and collect immediate predecessors void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bc607b002..8be7d6ff1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -208,7 +208,7 @@ public: void P2R_reg(u64 insn); void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); - void PBK(u64 insn); + void PBK(); void PCNT(u64 insn); void PEXIT(u64 insn); void PIXLD(u64 insn); @@ -252,7 +252,7 @@ public: void SHR_reg(u64 insn); void SHR_cbuf(u64 insn); void SHR_imm(u64 insn); - void SSY(u64 insn); + void SSY(); void ST(u64 insn); void STG(u64 insn); void STL(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c907c1ffb..0f52696d1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -762,7 +762,7 @@ void TranslatorVisitor::P2R_imm(u64) { ThrowNotImplemented(Opcode::P2R_imm); } -void TranslatorVisitor::PBK(u64) { +void TranslatorVisitor::PBK() { // PBK is a no-op } @@ -938,8 +938,8 @@ void TranslatorVisitor::SHR_imm(u64) { ThrowNotImplemented(Opcode::SHR_imm); } -void TranslatorVisitor::SSY(u64) { - ThrowNotImplemented(Opcode::SSY); +void TranslatorVisitor::SSY() { + // SSY is a no-op } void TranslatorVisitor::ST(u64) { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index f9bb063fb..7f8500087 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -28,7 +28,6 @@ void IdentityRemovalPass(IR::Block& block) { ++inst; } } - for (IR::Inst* const inst : to_invalidate) { inst->Invalidate(); } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index fe5454e9a..83f094d73 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -5,12 +5,21 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" namespace Shader::Optimization { +template +void Invoke(Func&& func, IR::Function& function) { + for (const auto& block : function.blocks) { + func(*block); + } +} + void DeadCodeEliminationPass(IR::Block& block); void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); +void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 000000000..a4b256a40 --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -0,0 +1,155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file implements the SSA rewriting algorithm proposed in +// +// Simple and Efficient Construction of Static Single Assignment Form. +// Braun M., Buchwald S., Hack S., Leißa R., Mallon C., Zwinkau A. (2013) +// In: Jhala R., De Bosschere K. (eds) +// Compiler Construction. CC 2013. +// Lecture Notes in Computer Science, vol 7791. +// Springer, Berlin, Heidelberg +// +// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 +// + +#include + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +using ValueMap = boost::container::flat_map>; + +struct DefTable { + [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { + return regs[IR::RegIndex(variable)]; + } + + [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept { + return preds[IR::PredIndex(variable)]; + } + + std::array regs; + std::array preds; +}; + +IR::Opcode UndefOpcode(IR::Reg) noexcept { + return IR::Opcode::Undef32; +} + +IR::Opcode UndefOpcode(IR::Pred) noexcept { + return IR::Opcode::Undef1; +} + +[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { + return inst.Opcode() == IR::Opcode::Phi; +} + +class Pass { +public: + void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) { + current_def[variable].insert_or_assign(block, value); + } + + IR::Value ReadVariable(auto variable, IR::Block* block) { + auto& def{current_def[variable]}; + if (const auto it{def.find(block)}; it != def.end()) { + return it->second; + } + return ReadVariableRecursive(variable, block); + } + +private: + IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { + IR::Value val; + if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + val = ReadVariable(variable, preds.front()); + } else { + // Break potential cycles with operandless phi + val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + WriteVariable(variable, block, val); + val = AddPhiOperands(variable, val, block); + } + WriteVariable(variable, block, val); + return val; + } + + IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + for (IR::Block* const pred : block->ImmediatePredecessors()) { + phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + } + return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); + } + + IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value same; + for (const auto& pair : phi.Inst()->PhiOperands()) { + const IR::Value& op{pair.second}; + if (op == same || op == phi) { + // Unique value or self-reference + continue; + } + if (!same.IsEmpty()) { + // The phi merges at least two values: not trivial + return phi; + } + same = op; + } + if (same.IsEmpty()) { + // The phi is unreachable or in the start block + const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)}; + same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; + } + // Reroute all uses of phi to same and remove phi + phi.Inst()->ReplaceUsesWith(same); + // TODO: Try to recursively remove all phi users, which might have become trivial + return same; + } + + DefTable current_def; +}; +} // Anonymous namespace + +void SsaRewritePass(IR::Function& function) { + Pass pass; + for (const auto& block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + } + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 39f0bf333..e3c9ad6e8 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -35,12 +35,12 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique(path.string().c_str())); }); - for (int i = 0; i < 1; ++i) { + for (int i = 0; i < 300; ++i) { for (auto& env : map) { // fmt::print(stdout, "Decoding {}\n", path.string()); const Location start_address{0}; auto cfg{std::make_unique(*env, start_address)}; - // fmt::print(stdout, "{}\n", cfg.Dot()); + // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); // const std::string code{EmitGLASM(program)}; From d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Feb 2021 16:43:04 -0300 Subject: [PATCH 004/770] shader: Initial instruction support --- src/shader_recompiler/CMakeLists.txt | 13 +- .../frontend/ir/basic_block.cpp | 4 +- .../frontend/ir/basic_block.h | 2 +- .../frontend/ir/ir_emitter.cpp | 200 ++++++++++++++++-- .../frontend/ir/ir_emitter.h | 67 +++++- .../frontend/ir/microinstruction.h | 12 +- src/shader_recompiler/frontend/ir/modifiers.h | 28 +++ src/shader_recompiler/frontend/ir/opcode.inc | 139 ++++++++---- src/shader_recompiler/frontend/ir/pred.h | 11 +- .../frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/common_encoding.h | 56 +++++ .../translate/impl/floating_point_add.cpp | 71 +++++++ .../floating_point_fused_multiply_add.cpp | 73 +++++++ .../impl/floating_point_multiply.cpp | 108 ++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 26 ++- .../frontend/maxwell/translate/impl/impl.h | 9 +- .../maxwell/translate/impl/integer_add.cpp | 106 ++++++++++ .../translate/impl/integer_scaled_add.cpp | 73 +++++++ .../translate/impl/integer_set_predicate.cpp | 99 +++++++++ .../translate/impl/integer_shift_left.cpp | 71 +++++++ .../impl/integer_short_multiply_add.cpp | 110 ++++++++++ .../translate/impl/load_store_memory.cpp | 153 +++++++++++--- .../{register_move.cpp => move_register.cpp} | 2 +- .../translate/impl/move_special_register.cpp | 114 ++++++++++ .../translate/impl/not_implemented.cpp | 149 +------------ .../ir_opt/get_set_elimination_pass.cpp | 87 -------- src/shader_recompiler/ir_opt/passes.h | 1 - src/shader_recompiler/main.cpp | 3 +- 28 files changed, 1452 insertions(+), 336 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/modifiers.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp rename src/shader_recompiler/frontend/maxwell/translate/impl/{register_move.cpp => move_register.cpp} (96%) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp delete mode 100644 src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 36a61f21a..f5dd4d29e 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -39,18 +39,27 @@ add_executable(shader_recompiler frontend/maxwell/program.h frontend/maxwell/termination_code.cpp frontend/maxwell/termination_code.h + frontend/maxwell/translate/impl/common_encoding.h + frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp + frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp + frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h + frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_scaled_add.cpp + frontend/maxwell/translate/impl/integer_set_predicate.cpp + frontend/maxwell/translate/impl/integer_shift_left.cpp + frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/not_implemented.cpp - frontend/maxwell/translate/impl/register_move.cpp + frontend/maxwell/translate/impl/move_register.cpp + frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/dead_code_elimination_pass.cpp - ir_opt/get_set_elimination_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e795618fc..249251dd0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args) { - Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; + std::initializer_list args, u64 flags) { + Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 4b6b80c4b..ec4a41cb1 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -39,7 +39,7 @@ public: /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args = {}); + std::initializer_list args = {}, u64 flags = 0); /// Adds a new immediate predecessor to the basic block. void AddImmediatePredecessor(IR::Block* immediate_predecessor); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6450e4b2c..87b253c9a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { Inst(Opcode::SetAttribute, attribute, value); } +U32 IREmitter::WorkgroupIdX() { + return Inst(Opcode::WorkgroupIdX); +} + +U32 IREmitter::WorkgroupIdY() { + return Inst(Opcode::WorkgroupIdY); +} + +U32 IREmitter::WorkgroupIdZ() { + return Inst(Opcode::WorkgroupIdZ); +} + +U32 IREmitter::LocalInvocationIdX() { + return Inst(Opcode::LocalInvocationIdX); +} + +U32 IREmitter::LocalInvocationIdY() { + return Inst(Opcode::LocalInvocationIdY); +} + +U32 IREmitter::LocalInvocationIdZ() { + return Inst(Opcode::LocalInvocationIdZ); +} + +U32 IREmitter::LoadGlobalU8(const U64& address) { + return Inst(Opcode::LoadGlobalU8, address); +} + +U32 IREmitter::LoadGlobalS8(const U64& address) { + return Inst(Opcode::LoadGlobalS8, address); +} + +U32 IREmitter::LoadGlobalU16(const U64& address) { + return Inst(Opcode::LoadGlobalU16, address); +} + +U32 IREmitter::LoadGlobalS16(const U64& address) { + return Inst(Opcode::LoadGlobalS16, address); +} + +U32 IREmitter::LoadGlobal32(const U64& address) { + return Inst(Opcode::LoadGlobal32, address); +} + +Value IREmitter::LoadGlobal64(const U64& address) { + return Inst(Opcode::LoadGlobal64, address); +} + +Value IREmitter::LoadGlobal128(const U64& address) { + return Inst(Opcode::LoadGlobal128, address); +} + void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { Inst(Opcode::WriteGlobalU8, address, value); } @@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } -U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { +U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U16: - return Inst(Opcode::FPAdd16, a, b); + return Inst(Opcode::FPAdd16, Flags{control}, a, b); case Type::U32: - return Inst(Opcode::FPAdd32, a, b); + return Inst(Opcode::FPAdd32, Flags{control}, a, b); case Type::U64: - return Inst(Opcode::FPAdd64, a, b); + return Inst(Opcode::FPAdd64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } @@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { if (e1.Type() != e2.Type()) { - throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); + throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } return Inst(Opcode::CompositeConstruct2, e1, e2); } Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { - throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); + throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); } return Inst(Opcode::CompositeConstruct3, e1, e2, e3); } @@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, const UAny& e4) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { - throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), - e4.Type()); + throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), + e3.Type(), e4.Type()); } return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); } @@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { return Inst(Opcode::CompositeExtract, vector, Imm32(static_cast(element))); } +UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { + if (true_value.Type() != false_value.Type()) { + throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); + } + switch (true_value.Type()) { + case Type::U8: + return Inst(Opcode::Select8, condition, true_value, false_value); + case Type::U16: + return Inst(Opcode::Select16, condition, true_value, false_value); + case Type::U32: + return Inst(Opcode::Select32, condition, true_value, false_value); + case Type::U64: + return Inst(Opcode::Select64, condition, true_value, false_value); + default: + throw InvalidArgument("Invalid type {}", true_value.Type()); + } +} + U64 IREmitter::PackUint2x32(const Value& vector) { return Inst(Opcode::PackUint2x32, vector); } @@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) { return Inst(Opcode::UnpackDouble2x32, value); } -U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { +U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U16: - return Inst(Opcode::FPMul16, a, b); + return Inst(Opcode::FPMul16, Flags{control}, a, b); case Type::U32: - return Inst(Opcode::FPMul32, a, b); + return Inst(Opcode::FPMul32, Flags{control}, a, b); case Type::U64: - return Inst(Opcode::FPMul64, a, b); + return Inst(Opcode::FPMul64, Flags{control}, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + FpControl control) { + if (a.Type() != b.Type() || a.Type() != c.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst(Opcode::FPFma16, Flags{control}, a, b, c); + case Type::U32: + return Inst(Opcode::FPFma32, Flags{control}, a, b, c); + case Type::U64: + return Inst(Opcode::FPFma64, Flags{control}, a, b, c); default: ThrowInvalidType(a.Type()); } @@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { } } +U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::IAdd32, a, b); + case Type::U64: + return Inst(Opcode::IAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32 IREmitter::IMul(const U32& a, const U32& b) { + return Inst(Opcode::IMul32, a, b); +} + +U32 IREmitter::INeg(const U32& value) { + return Inst(Opcode::INeg32, value); +} + +U32 IREmitter::IAbs(const U32& value) { + return Inst(Opcode::IAbs32, value); +} + +U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { + return Inst(Opcode::ShiftLeftLogical32, base, shift); +} + +U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { + return Inst(Opcode::ShiftRightLogical32, base, shift); +} + +U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { + return Inst(Opcode::ShiftRightArithmetic32, base, shift); +} + +U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseAnd32, a, b); +} + +U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseOr32, a, b); +} + +U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseXor32, a, b); +} + +U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count) { + return Inst(Opcode::BitFieldInsert, base, insert, offset, count); +} + +U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed) { + return Inst(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, + count); +} + +U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +} + +U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { + return Inst(Opcode::IEqual, lhs, rhs); +} + +U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +} + +U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { + return Inst(Opcode::INotEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst(Opcode::LogicalOr, a, b); } @@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { return Inst(Opcode::LogicalAnd, a, b); } +U1 IREmitter::LogicalXor(const U1& a, const U1& b) { + return Inst(Opcode::LogicalXor, a, b); +} + U1 IREmitter::LogicalNot(const U1& value) { return Inst(Opcode::LogicalNot, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1af79f41c..7ff763ecf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -4,8 +4,12 @@ #pragma once +#include +#include + #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { @@ -52,6 +56,22 @@ public: [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const U32& value); + [[nodiscard]] U32 WorkgroupIdX(); + [[nodiscard]] U32 WorkgroupIdY(); + [[nodiscard]] U32 WorkgroupIdZ(); + + [[nodiscard]] U32 LocalInvocationIdX(); + [[nodiscard]] U32 LocalInvocationIdY(); + [[nodiscard]] U32 LocalInvocationIdZ(); + + [[nodiscard]] U32 LoadGlobalU8(const U64& address); + [[nodiscard]] U32 LoadGlobalS8(const U64& address); + [[nodiscard]] U32 LoadGlobalU16(const U64& address); + [[nodiscard]] U32 LoadGlobalS16(const U64& address); + [[nodiscard]] U32 LoadGlobal32(const U64& address); + [[nodiscard]] Value LoadGlobal64(const U64& address); + [[nodiscard]] Value LoadGlobal128(const U64& address); + void WriteGlobalU8(const U64& address, const U32& value); void WriteGlobalS8(const U64& address, const U32& value); void WriteGlobalU16(const U64& address, const U32& value); @@ -71,6 +91,8 @@ public: const UAny& e4); [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); @@ -80,8 +102,10 @@ public: [[nodiscard]] U64 PackDouble2x32(const Value& vector); [[nodiscard]] Value UnpackDouble2x32(const U64& value); - [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); - [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); + [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); + [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); + [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + FpControl control = {}); [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); @@ -100,8 +124,31 @@ public: [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32 INeg(const U32& value); + [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); + [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); + [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); + [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); + [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count); + [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed); + + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); [[nodiscard]] U1 LogicalNot(const U1& value); [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); @@ -118,6 +165,22 @@ private: auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } + + template + requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v) struct Flags { + Flags() = default; + Flags(T proxy_) : proxy{proxy_} {} + + T proxy; + }; + + template + T Inst(Opcode op, Flags flags, Args... args) { + u64 raw_flags{}; + std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); + auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + return T{Value{&*it}}; + } }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 7f1ed6710..61849695a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,7 +5,9 @@ #pragma once #include +#include #include +#include #include #include @@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_) noexcept : op(op_) {} + explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { @@ -73,6 +75,14 @@ public: void ReplaceUsesWith(Value replacement); + template + requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(&ret, &flags, sizeof(ret)); + return ret; + } + private: void Use(const Value& value); void UndoUse(const Value& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 000000000..28bb9e798 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader::IR { + +enum class FmzMode { + None, // Denorms are not flushed, NAN is propagated (nouveau) + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) +}; + +enum class FpRounding { + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero +}; + +struct FpControl { + bool no_contraction{false}; + FpRounding rounding : 8 = FpRounding::RN; + FmzMode fmz_mode : 8 = FmzMode::FTZ; +}; +static_assert(sizeof(FpControl) <= sizeof(u64)); +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 40759e96a..4ecb5e936 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -35,6 +35,12 @@ OPCODE(SetZFlag, Void, U1, OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupIdX, U32, ) +OPCODE(WorkgroupIdY, U32, ) +OPCODE(WorkgroupIdZ, U32, ) +OPCODE(LocalInvocationIdX, U32, ) +OPCODE(LocalInvocationIdY, U32, ) +OPCODE(LocalInvocationIdZ, U32, ) // Undefined OPCODE(Undef1, U1, ) @@ -44,6 +50,13 @@ OPCODE(Undef32, U32, OPCODE(Undef64, U64, ) // Memory operations +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, Opaque, U64, ) +OPCODE(LoadGlobal128, Opaque, U64, ) OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, ) @@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3, Opaque, Opaq OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(CompositeExtract, Opaque, Opaque, U32, ) +// Select operations +OPCODE(Select8, U8, U1, U8, U8, ) +OPCODE(Select16, U16, U1, U16, U16, ) +OPCODE(Select32, U32, U1, U32, U32, ) +OPCODE(Select64, U64, U1, U64, U64, ) + // Bitwise conversions OPCODE(PackUint2x32, U64, Opaque, ) OPCODE(UnpackUint2x32, Opaque, U64, ) @@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp, U1, Opaq OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, U16, U16 ) -OPCODE(FPAbs32, U32, U32 ) -OPCODE(FPAbs64, U64, U64 ) -OPCODE(FPAdd16, U16, U16, U16 ) -OPCODE(FPAdd32, U32, U32, U32 ) -OPCODE(FPAdd64, U64, U64, U64 ) -OPCODE(FPFma16, U16, U16, U16 ) -OPCODE(FPFma32, U32, U32, U32 ) -OPCODE(FPFma64, U64, U64, U64 ) -OPCODE(FPMax32, U32, U32, U32 ) -OPCODE(FPMax64, U64, U64, U64 ) -OPCODE(FPMin32, U32, U32, U32 ) -OPCODE(FPMin64, U64, U64, U64 ) -OPCODE(FPMul16, U16, U16, U16 ) -OPCODE(FPMul32, U32, U32, U32 ) -OPCODE(FPMul64, U64, U64, U64 ) -OPCODE(FPNeg16, U16, U16 ) -OPCODE(FPNeg32, U32, U32 ) -OPCODE(FPNeg64, U64, U64 ) -OPCODE(FPRecip32, U32, U32 ) -OPCODE(FPRecip64, U64, U64 ) -OPCODE(FPRecipSqrt32, U32, U32 ) -OPCODE(FPRecipSqrt64, U64, U64 ) -OPCODE(FPSqrt, U32, U32 ) -OPCODE(FPSin, U32, U32 ) -OPCODE(FPSinNotReduced, U32, U32 ) -OPCODE(FPExp2, U32, U32 ) -OPCODE(FPExp2NotReduced, U32, U32 ) -OPCODE(FPCos, U32, U32 ) -OPCODE(FPCosNotReduced, U32, U32 ) -OPCODE(FPLog2, U32, U32 ) -OPCODE(FPSaturate16, U16, U16 ) -OPCODE(FPSaturate32, U32, U32 ) -OPCODE(FPSaturate64, U64, U64 ) -OPCODE(FPRoundEven16, U16, U16 ) -OPCODE(FPRoundEven32, U32, U32 ) -OPCODE(FPRoundEven64, U64, U64 ) -OPCODE(FPFloor16, U16, U16 ) -OPCODE(FPFloor32, U32, U32 ) -OPCODE(FPFloor64, U64, U64 ) -OPCODE(FPCeil16, U16, U16 ) -OPCODE(FPCeil32, U32, U32 ) -OPCODE(FPCeil64, U64, U64 ) -OPCODE(FPTrunc16, U16, U16 ) -OPCODE(FPTrunc32, U32, U32 ) -OPCODE(FPTrunc64, U64, U64 ) +OPCODE(FPAbs16, U16, U16, ) +OPCODE(FPAbs32, U32, U32, ) +OPCODE(FPAbs64, U64, U64, ) +OPCODE(FPAdd16, U16, U16, U16, ) +OPCODE(FPAdd32, U32, U32, U32, ) +OPCODE(FPAdd64, U64, U64, U64, ) +OPCODE(FPFma16, U16, U16, U16, U16, ) +OPCODE(FPFma32, U32, U32, U32, U32, ) +OPCODE(FPFma64, U64, U64, U64, U64, ) +OPCODE(FPMax32, U32, U32, U32, ) +OPCODE(FPMax64, U64, U64, U64, ) +OPCODE(FPMin32, U32, U32, U32, ) +OPCODE(FPMin64, U64, U64, U64, ) +OPCODE(FPMul16, U16, U16, U16, ) +OPCODE(FPMul32, U32, U32, U32, ) +OPCODE(FPMul64, U64, U64, U64, ) +OPCODE(FPNeg16, U16, U16, ) +OPCODE(FPNeg32, U32, U32, ) +OPCODE(FPNeg64, U64, U64, ) +OPCODE(FPRecip32, U32, U32, ) +OPCODE(FPRecip64, U64, U64, ) +OPCODE(FPRecipSqrt32, U32, U32, ) +OPCODE(FPRecipSqrt64, U64, U64, ) +OPCODE(FPSqrt, U32, U32, ) +OPCODE(FPSin, U32, U32, ) +OPCODE(FPSinNotReduced, U32, U32, ) +OPCODE(FPExp2, U32, U32, ) +OPCODE(FPExp2NotReduced, U32, U32, ) +OPCODE(FPCos, U32, U32, ) +OPCODE(FPCosNotReduced, U32, U32, ) +OPCODE(FPLog2, U32, U32, ) +OPCODE(FPSaturate16, U16, U16, ) +OPCODE(FPSaturate32, U32, U32, ) +OPCODE(FPSaturate64, U64, U64, ) +OPCODE(FPRoundEven16, U16, U16, ) +OPCODE(FPRoundEven32, U32, U32, ) +OPCODE(FPRoundEven64, U64, U64, ) +OPCODE(FPFloor16, U16, U16, ) +OPCODE(FPFloor32, U32, U32, ) +OPCODE(FPFloor64, U64, U64, ) +OPCODE(FPCeil16, U16, U16, ) +OPCODE(FPCeil32, U32, U32, ) +OPCODE(FPCeil64, U64, U64, ) +OPCODE(FPTrunc16, U16, U16, ) +OPCODE(FPTrunc32, U32, U32, ) +OPCODE(FPTrunc64, U64, U64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) + +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) OPCODE(LogicalNot, U1, U1, ) // Conversion operations diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index daf23193f..c6f2f82bf 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -8,7 +8,16 @@ namespace Shader::IR { -enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +enum class Pred : u64 { + P0, + P1, + P2, + P3, + P4, + P5, + P6, + PT, +}; constexpr size_t NUM_USER_PREDS = 6; constexpr size_t NUM_PREDS = 7; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 49d1f4bfb..bd1f96c07 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::Invoke(Optimization::IdentityRemovalPass, function); // Optimization::Invoke(Optimization::VerificationPass, function); } + //*/ } std::string DumpProgram(const Program& program) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..3da37a2bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" + +namespace Shader::Maxwell { + +enum class FpRounding : u64 { + RN, + RM, + RP, + RZ, +}; + +enum class FmzMode : u64 { + None, + FTZ, + FMZ, + INVALIDFMZ3, +}; + +inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { + switch (fp_rounding) { + case FpRounding::RN: + return IR::FpRounding::RN; + case FpRounding::RM: + return IR::FpRounding::RM; + case FpRounding::RP: + return IR::FpRounding::RP; + case FpRounding::RZ: + return IR::FpRounding::RZ; + } + throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); +} + +inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { + switch (fmz_mode) { + case FmzMode::None: + return IR::FmzMode::None; + case FmzMode::FTZ: + return IR::FmzMode::FTZ; + case FmzMode::FMZ: + return IR::FmzMode::FMZ; + case FmzMode::INVALIDFMZ3: + break; + } + throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..d2c44b9cc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, + const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fadd{insn}; + + if (sat) { + throw NotImplementedException("FADD SAT"); + } + if (cc) { + throw NotImplementedException("FADD CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +} + +void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd{insn}; + + FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, + fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FADD_reg(u64 insn) { + FADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + throw NotImplementedException("FADD (cbuf)"); +} + +void TranslatorVisitor::FADD_imm(u64) { + throw NotImplementedException("FADD (imm)"); +} + +void TranslatorVisitor::FADD32I(u64) { + throw NotImplementedException("FADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..30ca052ec --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, + bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const ffma{insn}; + + if (sat) { + throw NotImplementedException("FFMA SAT"); + } + if (cc) { + throw NotImplementedException("FFMA CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); +} + +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> sat; + BitField<51, 2, FpRounding> fp_rounding; + BitField<53, 2, FmzMode> fmz_mode; + } const ffma{insn}; + + FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, + ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); +} +} // Anonymous namespace + +void TranslatorVisitor::FFMA_reg(u64 insn) { + FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_rc(u64) { + throw NotImplementedException("FFMA (rc)"); +} + +void TranslatorVisitor::FFMA_cr(u64 insn) { + FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_imm(u64) { + throw NotImplementedException("FFMA (imm)"); +} + +void TranslatorVisitor::FFMA32I(u64) { + throw NotImplementedException("FFMA32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..743a1e2f0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Scale : u64 { + None, + D2, + D4, + D8, + M8, + M4, + M2, + INVALIDSCALE37, +}; + +float ScaleFactor(Scale scale) { + switch (scale) { + case Scale::None: + return 1.0f; + case Scale::D2: + return 1.0f / 2.0f; + case Scale::D4: + return 1.0f / 4.0f; + case Scale::D8: + return 1.0f / 8.0f; + case Scale::M8: + return 8.0f; + case Scale::M4: + return 4.0f; + case Scale::M2: + return 2.0f; + case Scale::INVALIDSCALE37: + break; + } + throw NotImplementedException("Invalid FMUL scale {}", scale); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, + FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fmul{insn}; + + if (cc) { + throw NotImplementedException("FMUL CC"); + } + if (sat) { + throw NotImplementedException("FMUL SAT"); + } + IR::U32 op_a{v.X(fmul.src_a)}; + if (scale != Scale::None) { + if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { + throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); + } + op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); + } + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 3, Scale> scale; + BitField<44, 2, FmzMode> fmz; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<50, 1, u64> sat; + } fmul{insn}; + + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, + fmul.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FMUL_reg(u64 insn) { + return FMUL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + throw NotImplementedException("FMUL (cbuf)"); +} + +void TranslatorVisitor::FMUL_imm(u64) { + throw NotImplementedException("FMUL (imm)"); +} + +void TranslatorVisitor::FMUL32I(u64) { + throw NotImplementedException("FMUL32I"); +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7bc7ce9f2..548c7f611 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +IR::U32 TranslatorVisitor::GetReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::U32 TranslatorVisitor::GetImm(u64 insn) { +IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; BitField<20, 19, u64> value; @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { return ir.Imm32(value); } +IR::U32 TranslatorVisitor::GetImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(static_cast(imm.value)); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8be7d6ff1..ef6d977fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -46,7 +46,7 @@ public: void DADD_reg(u64 insn); void DADD_cbuf(u64 insn); void DADD_imm(u64 insn); - void DEPBAR(u64 insn); + void DEPBAR(); void DFMA_reg(u64 insn); void DFMA_rc(u64 insn); void DFMA_cr(u64 insn); @@ -298,9 +298,14 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + [[nodiscard]] IR::U32 GetReg20(u64 insn); + [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::U32 GetImm(u64 insn); + [[nodiscard]] IR::U32 GetImm20(u64 insn); + + [[nodiscard]] IR::U32 GetImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..60f79b160 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, + bool cc) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const iadd{insn}; + + if (sat) { + throw NotImplementedException("IADD SAT"); + } + if (x && po) { + throw NotImplementedException("IADD X+PO"); + } + // Operand A is always read from here, negated if needed + IR::U32 op_a{v.X(iadd.src_a)}; + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + // Add both operands + IR::U32 result{v.ir.IAdd(op_a, op_b)}; + if (x) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + result = v.ir.IAdd(result, carry); + } + if (po) { + // .PO adds one to the result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + if (cc) { + // Store flags + // TODO: Does this grab the result pre-PO or after? + if (po) { + throw NotImplementedException("IADD CC+PO"); + } + // TODO: How does CC behave when X is set? + if (x) { + throw NotImplementedException("IADD X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); + } + // Store result + v.X(iadd.dest_reg, result); +} + +void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<50, 1, u64> sat; + } const iadd{insn}; + + const bool po{iadd.three_for_po == 3}; + const bool neg_a{!po && iadd.neg_a != 0}; + if (!po && iadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD_reg(u64) { + throw NotImplementedException("IADD (reg)"); +} + +void TranslatorVisitor::IADD_cbuf(u64 insn) { + IADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IADD_imm(u64) { + throw NotImplementedException("IADD (imm)"); +} + +void TranslatorVisitor::IADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> x; + BitField<54, 1, u64> sat; + BitField<55, 2, u64> three_for_po; + BitField<56, 1, u64> neg_a; + } const iadd32i{insn}; + + const bool po{iadd32i.three_for_po == 3}; + const bool neg_a{!po && iadd32i.neg_a != 0}; + IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..f92c0bbd6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + const bool po{iscadd.three_for_po == 3}; + IR::U32 op_a{v.X(iscadd.op_a)}; + if (!po) { + // When PO is not present, the bits are interpreted as negation + if (iscadd.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iscadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + } + // With the operands already processed, scale A + const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; + const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; + + IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; + if (po) { + // .PO adds one to the final result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + v.X(iscadd.dest_reg, result); + + if (iscadd.cc != 0) { + throw NotImplementedException("ISCADD CC"); + } +} + +} // Anonymous namespace + +void TranslatorVisitor::ISCADD_reg(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> op_b; + } const iscadd{insn}; + + ISCADD(*this, insn, X(iscadd.op_b)); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + throw NotImplementedException("ISCADD (cbuf)"); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + throw NotImplementedException("ISCADD (imm)"); +} + +void TranslatorVisitor::ISCADD32I(u64) { + throw NotImplementedException("ISCADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..76c6b5291 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class CompareOp : u64 { + F, // Always false + LT, // Less than + EQ, // Equal + LE, // Less than or equal + GT, // Greater than + NE, // Not equal + GE, // Greater than or equal + T, // Always true +}; + +enum class Bop : u64 { + AND, + OR, + XOR, +}; + +IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, + bool is_signed) { + switch (op) { + case CompareOp::F: + return ir.Imm1(false); + case CompareOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case CompareOp::EQ: + return ir.IEqual(lhs, rhs); + case CompareOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + case CompareOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case CompareOp::NE: + return ir.INotEqual(lhs, rhs); + case CompareOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case CompareOp::T: + return ir.Imm1(true); + } + throw NotImplementedException("Invalid ISETP compare op {}", op); +} + +IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { + switch (bop) { + case Bop::AND: + return ir.LogicalAnd(comparison, bop_pred); + case Bop::OR: + return ir.LogicalOr(comparison, bop_pred); + case Bop::XOR: + return ir.LogicalXor(comparison, bop_pred); + } + throw NotImplementedException("Invalid ISETP bop {}", bop); +} + +void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, Bop> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const isetp{insn}; + + const Bop bop{isetp.bop}; + const IR::U32 op_a{v.X(isetp.src_reg_a)}; + const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; + const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; + const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + v.ir.SetPred(isetp.dest_pred_a, result_a); + v.ir.SetPred(isetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::ISETP_reg(u64 insn) { + ISETP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISETP_cbuf(u64 insn) { + ISETP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISETP_imm(u64) { + throw NotImplementedException("ISETP_imm"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..d4b417d14 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> w; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + } const shl{insn}; + + if (shl.x != 0) { + throw NotImplementedException("SHL.X"); + } + if (shl.cc != 0) { + throw NotImplementedException("SHL.CC"); + } + const IR::U32 base{v.X(shl.src_reg_a)}; + IR::U32 result; + if (shl.w != 0) { + // When .W is set, the shift value is wrapped + // To emulate this we just have to clamp it ourselves. + const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; + result = v.ir.ShiftLeftLogical(base, shift); + } else { + // When .W is not set, the shift value is clamped between 0 and 32. + // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. + // We can safely evaluate an out of bounds shift according to the SPIR-V specification: + // + // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical + // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than + // or equal to the bit width of the components of Base." + // + // And on the GLASM specification it is also safe to evaluate out of bounds: + // + // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt + // "The results of a shift operation ("<<") are undefined if the value of the second operand + // is negative, or greater than or equal to the number of bits in the first operand." + // + // Emphasis on undefined results in contrast to undefined behavior. + // + const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; + const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; + result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + } + v.X(shl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHL_reg(u64) { + throw NotImplementedException("SHL_reg"); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + throw NotImplementedException("SHL_cbuf"); +} + +void TranslatorVisitor::SHL_imm(u64 insn) { + SHL(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..70a7c76c5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SelectMode : u64 { + Default, + CLO, + CHI, + CSFU, + CBCC, +}; + +enum class Half : u64 { + H0, // Least-significant bits (15:0) + H1, // Most-significant bits (31:16) +}; + +IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { + const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; + return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); +} + +void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, + SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_a_signed; + BitField<49, 1, u64> is_b_signed; + BitField<53, 1, Half> half_a; + } const xmad{insn}; + + if (x) { + throw NotImplementedException("XMAD X"); + } + const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; + const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; + + IR::U32 product{v.ir.IMul(op_a, op_b)}; + if (psl) { + // .PSL shifts the product 16 bits + product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); + } + const IR::U32 op_c{[&]() -> IR::U32 { + switch (select_mode) { + case SelectMode::Default: + return src_c; + case SelectMode::CLO: + return ExtractHalf(v, src_c, Half::H0, false); + case SelectMode::CHI: + return ExtractHalf(v, src_c, Half::H1, false); + case SelectMode::CBCC: + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + case SelectMode::CSFU: + throw NotImplementedException("XMAD CSFU"); + } + throw NotImplementedException("Invalid XMAD select mode {}", select_mode); + }()}; + IR::U32 result{v.ir.IAdd(product, op_c)}; + if (mrg) { + // .MRG inserts src_b [15:0] into result's [31:16]. + const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; + result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); + } + if (xmad.cc) { + throw NotImplementedException("XMAD CC"); + } + // Store result + v.X(xmad.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::XMAD_reg(u64) { + throw NotImplementedException("XMAD (reg)"); +} + +void TranslatorVisitor::XMAD_rc(u64) { + throw NotImplementedException("XMAD (rc)"); +} + +void TranslatorVisitor::XMAD_cr(u64) { + throw NotImplementedException("XMAD (cr)"); +} + +void TranslatorVisitor::XMAD_imm(u64 insn) { + union { + u64 raw; + BitField<20, 16, u64> src_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<39, 8, IR::Reg> src_c; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; + const IR::U32 src_c{X(xmad.src_c)}; + XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, + xmad.x != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index d8fd387cf..c9669c617 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -10,14 +10,33 @@ namespace Shader::Maxwell { namespace { -enum class StoreSize : u64 { - U8, - S8, - U16, - S16, +enum class LoadSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend B32, B64, B128, + U128, // ??? +}; + +enum class StoreSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, +}; + +// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (cache in L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) }; // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html @@ -27,61 +46,137 @@ enum class StoreCache : u64 { CS, // Cache streaming, likely to be accessed once WT, // Cache write-through (to system memory) }; + +IR::U64 Address(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 24, s64> addr_offset; + BitField<20, 24, u64> rz_addr_offset; + BitField<45, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + // LDG/STG without .E uses a 32-bit pointer, zero-extend it + return v.ir.ConvertU(64, v.X(mem.addr_reg)); + } + if (!IR::IsAligned(mem.addr_reg, 2)) { + throw NotImplementedException("Unaligned address register"); + } + // Pack two registers to build the 64-bit address + return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } + }()}; + // Apply the offset + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} } // Anonymous namespace +void TranslatorVisitor::LDG(u64 insn) { + // LDG loads global memory into registers + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<46, 2, LoadCache> cache; + BitField<48, 3, LoadSize> size; + } const ldg{insn}; + + // Pointer to load data from + const IR::U64 address{Address(*this, insn)}; + const IR::Reg dest_reg{ldg.dest_reg}; + switch (ldg.size) { + case LoadSize::U8: + X(dest_reg, ir.LoadGlobalU8(address)); + break; + case LoadSize::S8: + X(dest_reg, ir.LoadGlobalS8(address)); + break; + case LoadSize::U16: + X(dest_reg, ir.LoadGlobalU16(address)); + break; + case LoadSize::S16: + X(dest_reg, ir.LoadGlobalS16(address)); + break; + case LoadSize::B32: + X(dest_reg, ir.LoadGlobal32(address)); + break; + case LoadSize::B64: { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal64(address)}; + for (int i = 0; i < 2; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::B128: { + if (!IR::IsAligned(dest_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal128(address)}; + for (int i = 0; i < 4; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::U128: + throw NotImplementedException("LDG U.128"); + default: + throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); + } +} + void TranslatorVisitor::STG(u64 insn) { // STG stores registers into global memory. union { u64 raw; BitField<0, 8, IR::Reg> data_reg; - BitField<8, 8, IR::Reg> addr_reg; - BitField<45, 1, u64> e; BitField<46, 2, StoreCache> cache; BitField<48, 3, StoreSize> size; } const stg{insn}; - const IR::U64 address{[&]() -> IR::U64 { - if (stg.e == 0) { - // STG without .E uses a 32-bit pointer, zero-extend it - return ir.ConvertU(64, X(stg.addr_reg)); - } - if (!IR::IsAligned(stg.addr_reg, 2)) { - throw NotImplementedException("Unaligned address register"); - } - // Pack two registers to build the 32-bit address - return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); - }()}; - + // Pointer to store data into + const IR::U64 address{Address(*this, insn)}; + const IR::Reg data_reg{stg.data_reg}; switch (stg.size) { case StoreSize::U8: - ir.WriteGlobalU8(address, X(stg.data_reg)); + ir.WriteGlobalU8(address, X(data_reg)); break; case StoreSize::S8: - ir.WriteGlobalS8(address, X(stg.data_reg)); + ir.WriteGlobalS8(address, X(data_reg)); break; case StoreSize::U16: - ir.WriteGlobalU16(address, X(stg.data_reg)); + ir.WriteGlobalU16(address, X(data_reg)); break; case StoreSize::S16: - ir.WriteGlobalS16(address, X(stg.data_reg)); + ir.WriteGlobalS16(address, X(data_reg)); break; case StoreSize::B32: - ir.WriteGlobal32(address, X(stg.data_reg)); + ir.WriteGlobal32(address, X(data_reg)); break; case StoreSize::B64: { - if (!IR::IsAligned(stg.data_reg, 2)) { + if (!IR::IsAligned(data_reg, 2)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; ir.WriteGlobal64(address, vector); break; } case StoreSize::B128: - if (!IR::IsAligned(stg.data_reg, 4)) { + if (!IR::IsAligned(data_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), - X(stg.data_reg + 2), X(stg.data_reg + 3))}; + const IR::Value vector{ + ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; ir.WriteGlobal128(address, vector); break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp similarity index 96% rename from src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp rename to src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 7fa35ba3a..1711d3f48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) { void TranslatorVisitor::MOV_imm(u64 insn) { const MOV mov{insn}; CheckMask(mov); - X(mov.dest_reg, GetImm(insn)); + X(mov.dest_reg, GetImm20(insn)); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..93cea302a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -0,0 +1,114 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SpecialRegister : u64 { + SR_LANEID = 0, + SR_VIRTCFG = 2, + SR_VIRTID = 3, + SR_PM0 = 4, + SR_PM1 = 5, + SR_PM2 = 6, + SR_PM3 = 7, + SR_PM4 = 8, + SR_PM5 = 9, + SR_PM6 = 10, + SR_PM7 = 11, + SR_ORDERING_TICKET = 15, + SR_PRIM_TYPE = 16, + SR_INVOCATION_ID = 17, + SR_Y_DIRECTION = 18, + SR_THREAD_KILL = 19, + SM_SHADER_TYPE = 20, + SR_DIRECTCBEWRITEADDRESSLOW = 21, + SR_DIRECTCBEWRITEADDRESSHIGH = 22, + SR_DIRECTCBEWRITEENABLE = 23, + SR_MACHINE_ID_0 = 24, + SR_MACHINE_ID_1 = 25, + SR_MACHINE_ID_2 = 26, + SR_MACHINE_ID_3 = 27, + SR_AFFINITY = 28, + SR_INVOCATION_INFO = 29, + SR_WSCALEFACTOR_XY = 30, + SR_WSCALEFACTOR_Z = 31, + SR_TID = 32, + SR_TID_X = 33, + SR_TID_Y = 34, + SR_TID_Z = 35, + SR_CTAID_X = 37, + SR_CTAID_Y = 38, + SR_CTAID_Z = 39, + SR_NTID = 49, + SR_CirQueueIncrMinusOne = 50, + SR_NLATC = 51, + SR_SWINLO = 57, + SR_SWINSZ = 58, + SR_SMEMSZ = 59, + SR_SMEMBANKS = 60, + SR_LWINLO = 61, + SR_LWINSZ = 62, + SR_LMEMLOSZ = 63, + SR_LMEMHIOFF = 64, + SR_EQMASK = 65, + SR_LTMASK = 66, + SR_LEMASK = 67, + SR_GTMASK = 68, + SR_GEMASK = 69, + SR_REGALLOC = 70, + SR_GLOBALERRORSTATUS = 73, + SR_WARPERRORSTATUS = 75, + SR_PM_HI0 = 81, + SR_PM_HI1 = 82, + SR_PM_HI2 = 83, + SR_PM_HI3 = 84, + SR_PM_HI4 = 85, + SR_PM_HI5 = 86, + SR_PM_HI6 = 87, + SR_PM_HI7 = 88, + SR_CLOCKLO = 89, + SR_CLOCKHI = 90, + SR_GLOBALTIMERLO = 91, + SR_GLOBALTIMERHI = 92, + SR_HWTASKID = 105, + SR_CIRCULARQUEUEENTRYINDEX = 106, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, +}; + +[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { + switch (special_register) { + case SpecialRegister::SR_TID_X: + return ir.LocalInvocationIdX(); + case SpecialRegister::SR_TID_Y: + return ir.LocalInvocationIdY(); + case SpecialRegister::SR_TID_Z: + return ir.LocalInvocationIdZ(); + case SpecialRegister::SR_CTAID_X: + return ir.WorkgroupIdX(); + case SpecialRegister::SR_CTAID_Y: + return ir.WorkgroupIdY(); + case SpecialRegister::SR_CTAID_Z: + return ir.WorkgroupIdZ(); + default: + throw NotImplementedException("S2R special register {}", special_register); + } +} +} // Anonymous namespace + +void TranslatorVisitor::S2R(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, SpecialRegister> src_reg; + } const s2r{insn}; + + X(s2r.dest_reg, Read(ir, s2r.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0f52696d1..d70399f6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -7,21 +7,8 @@ #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#include "shader_recompiler/ir_opt/passes.h" - namespace Shader::Maxwell { -[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { - auto raw{IR::DumpBlock(block)}; - - Optimization::GetSetElimination(block); - Optimization::DeadCodeEliminationPass(block); - Optimization::IdentityRemovalPass(block); - auto dumped{IR::DumpBlock(block)}; - - fmt::print(stderr, "{}", dumped); -} - [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { throw NotImplementedException("Instruction {} is not implemented", opcode); } @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { ThrowNotImplemented(Opcode::DADD_imm); } -void TranslatorVisitor::DEPBAR(u64) { - ThrowNotImplemented(Opcode::DEPBAR); +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op } void TranslatorVisitor::DFMA_reg(u64) { @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { ThrowNotImplemented(Opcode::F2F_imm); } -void TranslatorVisitor::FADD_reg(u64) { - ThrowNotImplemented(Opcode::FADD_reg); -} - -void TranslatorVisitor::FADD_cbuf(u64) { - ThrowNotImplemented(Opcode::FADD_cbuf); -} - -void TranslatorVisitor::FADD_imm(u64) { - ThrowNotImplemented(Opcode::FADD_imm); -} - -void TranslatorVisitor::FADD32I(u64) { - ThrowNotImplemented(Opcode::FADD32I); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FFMA_reg(u64) { - ThrowNotImplemented(Opcode::FFMA_reg); -} - -void TranslatorVisitor::FFMA_rc(u64) { - ThrowNotImplemented(Opcode::FFMA_rc); -} - -void TranslatorVisitor::FFMA_cr(u64) { - ThrowNotImplemented(Opcode::FFMA_cr); -} - -void TranslatorVisitor::FFMA_imm(u64) { - ThrowNotImplemented(Opcode::FFMA_imm); -} - -void TranslatorVisitor::FFMA32I(u64) { - ThrowNotImplemented(Opcode::FFMA32I); -} - void TranslatorVisitor::FLO_reg(u64) { ThrowNotImplemented(Opcode::FLO_reg); } @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { ThrowNotImplemented(Opcode::FMNMX_imm); } -void TranslatorVisitor::FMUL_reg(u64) { - ThrowNotImplemented(Opcode::FMUL_reg); -} - -void TranslatorVisitor::FMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::FMUL_cbuf); -} - -void TranslatorVisitor::FMUL_imm(u64) { - ThrowNotImplemented(Opcode::FMUL_imm); -} - -void TranslatorVisitor::FMUL32I(u64) { - ThrowNotImplemented(Opcode::FMUL32I); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD_reg(u64) { - ThrowNotImplemented(Opcode::IADD_reg); -} - -void TranslatorVisitor::IADD_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD_cbuf); -} - -void TranslatorVisitor::IADD_imm(u64) { - ThrowNotImplemented(Opcode::IADD_imm); -} - void TranslatorVisitor::IADD3_reg(u64) { ThrowNotImplemented(Opcode::IADD3_reg); } @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::IADD32I(u64) { - ThrowNotImplemented(Opcode::IADD32I); -} - void TranslatorVisitor::ICMP_reg(u64) { ThrowNotImplemented(Opcode::ICMP_reg); } @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISCADD_reg(u64) { - ThrowNotImplemented(Opcode::ISCADD_reg); -} - -void TranslatorVisitor::ISCADD_cbuf(u64) { - ThrowNotImplemented(Opcode::ISCADD_cbuf); -} - -void TranslatorVisitor::ISCADD_imm(u64) { - ThrowNotImplemented(Opcode::ISCADD_imm); -} - -void TranslatorVisitor::ISCADD32I(u64) { - ThrowNotImplemented(Opcode::ISCADD32I); -} - void TranslatorVisitor::ISET_reg(u64) { ThrowNotImplemented(Opcode::ISET_reg); } @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { ThrowNotImplemented(Opcode::ISET_imm); } -void TranslatorVisitor::ISETP_reg(u64) { - ThrowNotImplemented(Opcode::ISETP_reg); -} - -void TranslatorVisitor::ISETP_cbuf(u64) { - ThrowNotImplemented(Opcode::ISETP_cbuf); -} - -void TranslatorVisitor::ISETP_imm(u64) { - ThrowNotImplemented(Opcode::ISETP_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { ThrowNotImplemented(Opcode::LDC); } -void TranslatorVisitor::LDG(u64) { - ThrowNotImplemented(Opcode::LDG); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } -void TranslatorVisitor::S2R(u64) { - ThrowNotImplemented(Opcode::S2R); -} - void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHL_reg(u64) { - ThrowNotImplemented(Opcode::SHL_reg); -} - -void TranslatorVisitor::SHL_cbuf(u64) { - ThrowNotImplemented(Opcode::SHL_cbuf); -} - -void TranslatorVisitor::SHL_imm(u64) { - ThrowNotImplemented(Opcode::SHL_imm); -} - void TranslatorVisitor::SHR_reg(u64) { ThrowNotImplemented(Opcode::SHR_reg); } @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { ThrowNotImplemented(Opcode::VSHR); } -void TranslatorVisitor::XMAD_reg(u64) { - ThrowNotImplemented(Opcode::XMAD_reg); -} - -void TranslatorVisitor::XMAD_rc(u64) { - ThrowNotImplemented(Opcode::XMAD_rc); -} - -void TranslatorVisitor::XMAD_cr(u64) { - ThrowNotImplemented(Opcode::XMAD_cr); -} - -void TranslatorVisitor::XMAD_imm(u64) { - ThrowNotImplemented(Opcode::XMAD_imm); -} - } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp deleted file mode 100644 index 21b8526cd..000000000 --- a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Optimization { -namespace { -using Iterator = IR::Block::iterator; - -enum class TrackingType { - Reg, -}; - -struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - Iterator last_set_instruction; - bool set_instruction_present = false; -}; - -void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst, - TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; -} - -RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) { - RegisterInfo info{}; - info.register_value = IR::Value{&*get_inst}; - info.tracking_type = tracking_type; - return info; -} - -void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - if (info.register_value.IsEmpty()) { - info = Nothing(get_inst, tracking_type); - return; - } - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - info = Nothing(get_inst, tracking_type); -} -} // Anonymous namespace - -void GetSetElimination(IR::Block& block) { - std::array reg_info; - - for (Iterator inst = block.begin(); inst != block.end(); ++inst) { - switch (inst->Opcode()) { - case IR::Opcode::GetRegister: { - const IR::Reg reg{inst->Arg(0).Reg()}; - if (reg == IR::Reg::RZ) { - break; - } - const size_t index{static_cast(reg)}; - DoGet(reg_info.at(index), inst, TrackingType::Reg); - break; - } - case IR::Opcode::SetRegister: { - const IR::Reg reg{inst->Arg(0).Reg()}; - if (reg == IR::Reg::RZ) { - break; - } - const size_t index{static_cast(reg)}; - DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg); - break; - } - default: - break; - } - } -} - -} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 83f094d73..7ed4005ed 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -17,7 +17,6 @@ void Invoke(Func&& func, IR::Function& function) { } void DeadCodeEliminationPass(IR::Block& block); -void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index e3c9ad6e8..4022c6fe2 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -51,7 +51,8 @@ void RunDatabase() { int main() { // RunDatabase(); - FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; auto cfg{std::make_unique(env, 0)}; // fmt::print(stdout, "{}\n", cfg->Dot()); From e81739493a0cacc1efe3295f9d287d5d31b1a989 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 05:58:02 -0300 Subject: [PATCH 005/770] shader: Constant propagation and global memory to storage buffer --- src/shader_recompiler/CMakeLists.txt | 2 + .../frontend/ir/ir_emitter.cpp | 20 +- .../frontend/ir/ir_emitter.h | 5 +- .../frontend/ir/microinstruction.cpp | 26 +- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcode.inc | 22 +- src/shader_recompiler/frontend/ir/type.cpp | 2 +- src/shader_recompiler/frontend/ir/type.h | 1 - src/shader_recompiler/frontend/ir/value.cpp | 17 +- src/shader_recompiler/frontend/ir/value.h | 1 - .../frontend/maxwell/program.cpp | 6 +- .../ir_opt/constant_propagation_pass.cpp | 146 ++++++++ .../global_memory_to_storage_buffer_pass.cpp | 331 ++++++++++++++++++ .../ir_opt/identity_removal_pass.cpp | 28 +- src/shader_recompiler/ir_opt/passes.h | 6 +- .../ir_opt/ssa_rewrite_pass.cpp | 56 ++- .../ir_opt/verification_pass.cpp | 42 ++- 17 files changed, 652 insertions(+), 63 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/constant_propagation_pass.cpp create mode 100644 src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f5dd4d29e..72d5f41d2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -59,7 +59,9 @@ add_executable(shader_recompiler frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h + ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp + ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 87b253c9a..1c5ae0109 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -504,6 +504,20 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { } } +U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::ISub32, a, b); + case Type::U64: + return Inst(Opcode::ISub64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst(Opcode::IMul32, a, b); } @@ -679,8 +693,8 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& v } } -U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { - switch (bitsize) { +U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { + switch (result_bitsize) { case 32: switch (value.Type()) { case Type::U32: @@ -703,7 +717,7 @@ U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { break; } } - throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize); + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7ff763ecf..84b844898 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -17,6 +17,8 @@ namespace Shader::IR { class IREmitter { public: explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + explicit IREmitter(Block& block_, Block::iterator insertion_point_) + : block{block_}, insertion_point{insertion_point_} {} Block& block; @@ -125,6 +127,7 @@ public: [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32 INeg(const U32& value); [[nodiscard]] U32 IAbs(const U32& value); @@ -155,7 +158,7 @@ public: [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); - [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value); + [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ecf76e23d..de953838c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/type.h" @@ -44,6 +46,13 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteGlobal32: case Opcode::WriteGlobal64: case Opcode::WriteGlobal128: + case Opcode::WriteStorageU8: + case Opcode::WriteStorageS8: + case Opcode::WriteStorageU16: + case Opcode::WriteStorageS16: + case Opcode::WriteStorage32: + case Opcode::WriteStorage64: + case Opcode::WriteStorage128: return true; default: return false; @@ -56,15 +65,19 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetSignFromOp: case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: - case Opcode::GetZSCOFromOp: return true; default: return false; } } +bool Inst::AreAllArgsImmediates() const noexcept { + return std::all_of(args.begin(), args.begin() + NumArgs(), + [](const IR::Value& value) { return value.IsImmediate(); }); +} + bool Inst::HasAssociatedPseudoOperation() const noexcept { - return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst; + return zero_inst || sign_inst || carry_inst || overflow_inst; } Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { @@ -82,9 +95,6 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { case Opcode::GetOverflowFromOp: CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); return overflow_inst; - case Opcode::GetZSCOFromOp: - CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp); - return zsco_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -176,9 +186,6 @@ void Inst::Use(const Value& value) { case Opcode::GetOverflowFromOp: SetPseudoInstruction(value.Inst()->overflow_inst, this); break; - case Opcode::GetZSCOFromOp: - SetPseudoInstruction(value.Inst()->zsco_inst, this); - break; default: break; } @@ -200,9 +207,6 @@ void Inst::UndoUse(const Value& value) { case Opcode::GetOverflowFromOp: RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); break; - case Opcode::GetZSCOFromOp: - RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp); - break; default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 61849695a..22101c9e2 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -49,6 +49,9 @@ public: /// Pseudo-instructions depend on their parent instructions for their semantics. [[nodiscard]] bool IsPseudoInstruction() const noexcept; + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const noexcept; + /// Determines if there is a pseudo-operation associated with this instruction. [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction @@ -94,7 +97,6 @@ private: Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; - Inst* zsco_inst{}; std::vector> phi_operands; u64 flags{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 4ecb5e936..4596bf39f 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -24,9 +24,6 @@ OPCODE(GetAttribute, U32, Attr OPCODE(SetAttribute, U32, Attribute, ) OPCODE(GetAttributeIndexed, U32, U32, ) OPCODE(SetAttributeIndexed, U32, U32, ) -OPCODE(GetZSCORaw, U32, ) -OPCODE(SetZSCORaw, Void, U32, ) -OPCODE(SetZSCO, Void, ZSCO, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) @@ -65,6 +62,22 @@ OPCODE(WriteGlobal32, Void, U64, OPCODE(WriteGlobal64, Void, U64, Opaque, ) OPCODE(WriteGlobal128, Void, U64, Opaque, ) +// Storage buffer operations +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, Opaque, U32, U32, ) +OPCODE(LoadStorage128, Opaque, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, Opaque, ) +OPCODE(WriteStorage128, Void, U32, U32, Opaque, ) + // Vector utility OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) @@ -90,7 +103,6 @@ OPCODE(GetZeroFromOp, U1, Opaq OPCODE(GetSignFromOp, U1, Opaque, ) OPCODE(GetCarryFromOp, U1, Opaque, ) OPCODE(GetOverflowFromOp, U1, Opaque, ) -OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) // Floating-point operations OPCODE(FPAbs16, U16, U16, ) @@ -143,6 +155,8 @@ OPCODE(FPTrunc64, U64, U64, // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) OPCODE(IAbs32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp index da1e2a0f6..13cc09195 100644 --- a/src/shader_recompiler/frontend/ir/type.cpp +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -11,7 +11,7 @@ namespace Shader::IR { std::string NameOf(Type type) { static constexpr std::array names{ - "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO", + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", }; const size_t bits{static_cast(type)}; if (bits == 0) { diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index f753628e8..397875018 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -25,7 +25,6 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, - ZSCO = 1 << 10, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e974e88c..59a9b10dc 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -91,26 +91,41 @@ IR::Attribute Value::Attribute() const { } bool Value::U1() const { + if (IsIdentity()) { + return inst->Arg(0).U1(); + } ValidateAccess(Type::U1); return imm_u1; } u8 Value::U8() const { + if (IsIdentity()) { + return inst->Arg(0).U8(); + } ValidateAccess(Type::U8); return imm_u8; } u16 Value::U16() const { + if (IsIdentity()) { + return inst->Arg(0).U16(); + } ValidateAccess(Type::U16); return imm_u16; } u32 Value::U32() const { + if (IsIdentity()) { + return inst->Arg(0).U32(); + } ValidateAccess(Type::U32); return imm_u32; } u64 Value::U64() const { + if (IsIdentity()) { + return inst->Arg(0).U64(); + } ValidateAccess(Type::U64); return imm_u64; } @@ -142,8 +157,6 @@ bool Value::operator==(const Value& other) const { return imm_u32 == other.imm_u32; case Type::U64: return imm_u64 == other.imm_u64; - case Type::ZSCO: - throw NotImplementedException("ZSCO comparison"); } throw LogicError("Invalid type {}", type); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 368119921..31f831794 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -96,6 +96,5 @@ using U64 = TypedValue; using U32U64 = TypedValue; using U16U32U64 = TypedValue; using UAny = TypedValue; -using ZSCO = TypedValue; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index bd1f96c07..b3f2de852 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -52,9 +52,11 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); + Optimization::Invoke(Optimization::ConstantPropagationPass, function); Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); - Optimization::Invoke(Optimization::IdentityRemovalPass, function); - // Optimization::Invoke(Optimization::VerificationPass, function); + Optimization::IdentityRemovalPass(function); + Optimization::VerificationPass(function); } //*/ } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..02f5b653d --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -0,0 +1,146 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { + if (static_cast(shift) + static_cast(count) > Common::BitSize()) { + throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); +} + +template +[[nodiscard]] T Arg(const IR::Value& value) { + if constexpr (std::is_same_v) { + return value.U1(); + } else if constexpr (std::is_same_v) { + return value.U32(); + } else if constexpr (std::is_same_v) { + return value.U64(); + } +} + +template +bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { + const auto arg = [](const IR::Value& value) { + if constexpr (std::is_invocable_r_v) { + return value.U1(); + } else if constexpr (std::is_invocable_r_v) { + return value.U32(); + } else if constexpr (std::is_invocable_r_v) { + return value.U64(); + } + }; + const IR::Value lhs{inst.Arg(0)}; + const IR::Value rhs{inst.Arg(1)}; + + const bool is_lhs_immediate{lhs.IsImmediate()}; + const bool is_rhs_immediate{rhs.IsImmediate()}; + + if (is_lhs_immediate && is_rhs_immediate) { + const auto result{imm_fn(arg(lhs), arg(rhs))}; + inst.ReplaceUsesWith(IR::Value{result}); + return false; + } + if (is_lhs_immediate && !is_rhs_immediate) { + IR::Inst* const rhs_inst{rhs.InstRecursive()}; + if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; + inst.SetArg(0, rhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* const lhs_inst{lhs.InstRecursive()}; + if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; + inst.SetArg(0, lhs_inst->Arg(0)); + inst.SetArg(1, IR::Value{combined}); + } + } + return true; +} + +void FoldGetRegister(IR::Inst& inst) { + if (inst.Arg(0).Reg() == IR::Reg::RZ) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + } +} + +void FoldGetPred(IR::Inst& inst) { + if (inst.Arg(0).Pred() == IR::Pred::PT) { + inst.ReplaceUsesWith(IR::Value{true}); + } +} + +template +void FoldAdd(IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + return; + } + if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate() && Arg(rhs) == 0) { + inst.ReplaceUsesWith(inst.Arg(0)); + } +} + +void FoldLogicalAnd(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(inst.Arg(0)); + } else { + inst.ReplaceUsesWith(IR::Value{false}); + } + } +} + +void ConstantPropagation(IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::GetRegister: + return FoldGetRegister(inst); + case IR::Opcode::GetPred: + return FoldGetPred(inst); + case IR::Opcode::IAdd32: + return FoldAdd(inst); + case IR::Opcode::IAdd64: + return FoldAdd(inst); + case IR::Opcode::BitFieldUExtract: + if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { + inst.ReplaceUsesWith(IR::Value{ + BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); + } + break; + case IR::Opcode::LogicalAnd: + return FoldLogicalAnd(inst); + default: + break; + } +} +} // Anonymous namespace + +void ConstantPropagationPass(IR::Block& block) { + std::ranges::for_each(block, ConstantPropagation); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..ee69a5c9d --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -0,0 +1,331 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +/// Address in constant buffers to the storage buffer descriptor +struct StorageBufferAddr { + auto operator<=>(const StorageBufferAddr&) const noexcept = default; + + u32 index; + u32 offset; +}; + +/// Block iterator to a global memory instruction and the storage buffer it uses +struct StorageInst { + StorageBufferAddr storage_buffer; + IR::Block::iterator inst; +}; + +/// Bias towards a certain range of constant buffers when looking for storage buffers +struct Bias { + u32 index; + u32 offset_begin; + u32 offset_end; +}; + +using StorageBufferSet = + boost::container::flat_set, + boost::container::small_vector>; +using StorageInstVector = boost::container::small_vector; + +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemory(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + +/// Converts a global memory opcode to its storage buffer equivalent +IR::Opcode GlobalToStorage(IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::LoadGlobalS8: + return IR::Opcode::LoadStorageS8; + case IR::Opcode::LoadGlobalU8: + return IR::Opcode::LoadStorageU8; + case IR::Opcode::LoadGlobalS16: + return IR::Opcode::LoadStorageS16; + case IR::Opcode::LoadGlobalU16: + return IR::Opcode::LoadStorageU16; + case IR::Opcode::LoadGlobal32: + return IR::Opcode::LoadStorage32; + case IR::Opcode::LoadGlobal64: + return IR::Opcode::LoadStorage64; + case IR::Opcode::LoadGlobal128: + return IR::Opcode::LoadStorage128; + case IR::Opcode::WriteGlobalS8: + return IR::Opcode::WriteStorageS8; + case IR::Opcode::WriteGlobalU8: + return IR::Opcode::WriteStorageU8; + case IR::Opcode::WriteGlobalS16: + return IR::Opcode::WriteStorageS16; + case IR::Opcode::WriteGlobalU16: + return IR::Opcode::WriteStorageU16; + case IR::Opcode::WriteGlobal32: + return IR::Opcode::WriteStorage32; + case IR::Opcode::WriteGlobal64: + return IR::Opcode::WriteStorage64; + case IR::Opcode::WriteGlobal128: + return IR::Opcode::WriteStorage128; + default: + throw InvalidArgument("Invalid global memory opcode {}", opcode); + } +} + +/// Returns true when a storage buffer address satisfies a bias +bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { + return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end; +} + +/// Ignores a global memory operation, reads return zero and writes are ignored +void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { + const IR::Value zero{u32{0}}; + switch (inst->Opcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + inst->ReplaceUsesWith(zero); + break; + case IR::Opcode::LoadGlobal64: + inst->ReplaceUsesWith( + IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); + break; + case IR::Opcode::LoadGlobal128: + inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( + inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); + break; + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + inst->Invalidate(); + break; + default: + throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); + } +} + +/// Recursively tries to track the storage buffer address used by a global memory instruction +std::optional Track(const IR::Value& value, const Bias* bias) { + if (value.IsImmediate()) { + // Immediates can't be a storage buffer + return std::nullopt; + } + const IR::Inst* const inst{value.InstRecursive()}; + if (inst->Opcode() == IR::Opcode::GetCbuf) { + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a non-immediate index + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index = index.U32(), + .offset = offset.U32(), + }; + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly point to them + return std::nullopt; + } + return storage_buffer; + } + // Reversed loops are more likely to find the right result + for (size_t arg = inst->NumArgs(); arg--;) { + if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { + return *storage_buffer; + } + } + return std::nullopt; +} + +/// Collects the storage buffer used by a global memory instruction and the instruction itself +void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, + StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { + // NVN puts storage buffers in a specific range, we have to bias towards these addresses to + // avoid getting false positives + static constexpr Bias nvn_bias{ + .index{0}, + .offset_begin{0x110}, + .offset_end{0x610}, + }; + // First try to find storage buffers in the NVN address + const IR::U64 addr{inst->Arg(0)}; + std::optional storage_buffer{Track(addr, &nvn_bias)}; + if (!storage_buffer) { + // If it fails, track without a bias + storage_buffer = Track(addr, nullptr); + if (!storage_buffer) { + // If that also failed, drop the global memory usage + IgnoreGlobalMemory(block, inst); + } + } + // Collect storage buffer and the instruction + storage_buffer_set.insert(*storage_buffer); + to_replace.push_back(StorageInst{ + .storage_buffer{*storage_buffer}, + .inst{inst}, + }); +} + +/// Tries to track the first 32-bits of a global memory instruction +std::optional TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { + // The first argument is the low level GPU pointer to the global memory instruction + const IR::U64 addr{inst->Arg(0)}; + if (addr.IsImmediate()) { + // Not much we can do if it's an immediate + return std::nullopt; + } + // This address is expected to either be a PackUint2x32 or a IAdd64 + IR::Inst* addr_inst{addr.InstRecursive()}; + s32 imm_offset{0}; + if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + // If it's an IAdd64, get the immediate offset it is applying and grab the address + // instruction. This expects for the instruction to be canonicalized having the address on + // the first argument and the immediate offset on the second one. + const IR::U64 imm_offset_value{addr_inst->Arg(1)}; + if (!imm_offset_value.IsImmediate()) { + return std::nullopt; + } + imm_offset = static_cast(static_cast(imm_offset_value.U64())); + const IR::U64 iadd_addr{addr_inst->Arg(0)}; + if (iadd_addr.IsImmediate()) { + return std::nullopt; + } + addr_inst = iadd_addr.Inst(); + } + // With IAdd64 handled, now PackUint2x32 is expected without exceptions + if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + return std::nullopt; + } + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + // This vector is expected to be a CompositeConstruct2 + IR::Inst* const vector_inst{vector.InstRecursive()}; + if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { + return std::nullopt; + } + // Grab the first argument from the CompositeConstruct2, this is the low address. + // Re-apply the offset in case we found one. + const IR::U32 low_addr{vector_inst->Arg(0)}; + return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; +} + +/// Returns the offset in indices (not bytes) for an equivalent storage instruction +IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, inst}; + IR::U32 offset; + if (const std::optional low_addr{TrackLowAddress(ir, &*inst)}) { + offset = *low_addr; + } else { + offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); + } + // Subtract the least significant 32 bits from the guest offset. The result is the storage + // buffer offset in bytes. + const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; + return ir.ISub(offset, low_cbuf); +} + +/// Replace a global memory load instruction with its storage buffer equivalent +void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; + const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; + inst->ReplaceUsesWith(value); +} + +/// Replace a global memory write instruction with its storage buffer equivalent +void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; + block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); + inst->Invalidate(); +} + +/// Replace a global memory instruction with its storage buffer equivalent +void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, + const IR::U32& offset) { + switch (inst->Opcode()) { + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + return ReplaceLoad(block, inst, storage_index, offset); + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return ReplaceWrite(block, inst, storage_index, offset); + default: + throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); + } +} +} // Anonymous namespace + +void GlobalMemoryToStorageBufferPass(IR::Block& block) { + StorageBufferSet storage_buffers; + StorageInstVector to_replace; + + for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { + if (!IsGlobalMemory(*inst)) { + continue; + } + CollectStorageBuffers(block, inst, storage_buffers, to_replace); + } + for (const auto [storage_buffer, inst] : to_replace) { + const auto it{storage_buffers.find(storage_buffer)}; + const IR::U32 storage_index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; + Replace(block, inst, storage_index, offset); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 7f8500087..39a972919 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -10,22 +10,24 @@ namespace Shader::Optimization { -void IdentityRemovalPass(IR::Block& block) { +void IdentityRemovalPass(IR::Function& function) { std::vector to_invalidate; - for (auto inst = block.begin(); inst != block.end();) { - const size_t num_args{inst->NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - IR::Value arg; - while ((arg = inst->Arg(i)).IsIdentity()) { - inst->SetArg(i, arg.Inst()->Arg(0)); + for (auto& block : function.blocks) { + for (auto inst = block->begin(); inst != block->end();) { + const size_t num_args{inst->NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Value arg; + while ((arg = inst->Arg(i)).IsIdentity()) { + inst->SetArg(i, arg.Inst()->Arg(0)); + } + } + if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + to_invalidate.push_back(&*inst); + inst = block->Instructions().erase(inst); + } else { + ++inst; } - } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { - to_invalidate.push_back(&*inst); - inst = block.Instructions().erase(inst); - } else { - ++inst; } } for (IR::Inst* const inst : to_invalidate) { diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 7ed4005ed..578a24d89 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) { } } +void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Block& block); +void IdentityRemovalPass(IR::Function& function); void SsaRewritePass(IR::Function& function); -void VerificationPass(const IR::Block& block); +void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a4b256a40..3c9b020e0 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,8 +14,6 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // -#include - #include #include "shader_recompiler/frontend/ir/basic_block.h" @@ -30,6 +28,12 @@ namespace Shader::Optimization { namespace { using ValueMap = boost::container::flat_map>; +struct FlagTag {}; +struct ZeroFlagTag : FlagTag {}; +struct SignFlagTag : FlagTag {}; +struct CarryFlagTag : FlagTag {}; +struct OverflowFlagTag : FlagTag {}; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -39,8 +43,28 @@ struct DefTable { return preds[IR::PredIndex(variable)]; } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { + return zero_flag; + } + + [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { + return sign_flag; + } + + [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { + return carry_flag; + } + + [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { + return overflow_flag; + } + std::array regs; std::array preds; + ValueMap zero_flag; + ValueMap sign_flag; + ValueMap carry_flag; + ValueMap overflow_flag; }; IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept { return IR::Opcode::Undef1; } +IR::Opcode UndefOpcode(const FlagTag&) noexcept { + return IR::Opcode::Undef1; +} + [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { return inst.Opcode() == IR::Opcode::Phi; } @@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) { pass.WriteVariable(pred, block.get(), inst.Arg(1)); } break; + case IR::Opcode::SetZFlag: + pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); + break; + case IR::Opcode::SetSFlag: + pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); + break; + case IR::Opcode::SetCFlag: + pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); + break; + case IR::Opcode::SetOFlag: + pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); + break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); @@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) { inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); } break; + case IR::Opcode::GetZFlag: + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); + break; + case IR::Opcode::GetSFlag: + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); + break; + case IR::Opcode::GetCFlag: + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); + break; + case IR::Opcode::GetOFlag: + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 36d9ae39b..8a5adf5a2 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -11,40 +11,44 @@ namespace Shader::Optimization { -static void ValidateTypes(const IR::Block& block) { - for (const IR::Inst& inst : block) { - const size_t num_args{inst.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; - if (!IR::AreTypesCompatible(t1, t2)) { - throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); +static void ValidateTypes(const IR::Function& function) { + for (const auto& block : function.blocks) { + for (const IR::Inst& inst : *block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Type t1{inst.Arg(i).Type()}; + const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + if (!IR::AreTypesCompatible(t1, t2)) { + throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); + } } } } } -static void ValidateUses(const IR::Block& block) { +static void ValidateUses(const IR::Function& function) { std::map actual_uses; - for (const IR::Inst& inst : block) { - const size_t num_args{inst.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - const IR::Value arg{inst.Arg(i)}; - if (!arg.IsImmediate()) { - ++actual_uses[arg.Inst()]; + for (const auto& block : function.blocks) { + for (const IR::Inst& inst : *block) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + const IR::Value arg{inst.Arg(i)}; + if (!arg.IsImmediate()) { + ++actual_uses[arg.Inst()]; + } } } } for (const auto [inst, uses] : actual_uses) { if (inst->UseCount() != uses) { - throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); + throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); } } } -void VerificationPass(const IR::Block& block) { - ValidateTypes(block); - ValidateUses(block); +void VerificationPass(const IR::Function& function) { + ValidateTypes(function); + ValidateUses(function); } } // namespace Shader::Optimization From dc04a50ac2aa0bc71db701d0eea857765c2581f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Jul 2021 17:11:47 -0300 Subject: [PATCH 006/770] shader: Remove illegal character in SSA pass --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 3c9b020e0..a62d3f56b 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -5,7 +5,7 @@ // This file implements the SSA rewriting algorithm proposed in // // Simple and Efficient Construction of Static Single Assignment Form. -// Braun M., Buchwald S., Hack S., Leißa R., Mallon C., Zwinkau A. (2013) +// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013) // In: Jhala R., De Bosschere K. (eds) // Compiler Construction. CC 2013. // Lecture Notes in Computer Science, vol 7791. From be94ee88d227d0d3dbeabe9ade98bacd910c7a7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 19:19:36 -0300 Subject: [PATCH 007/770] shader: Make typed IR --- .../frontend/ir/ir_emitter.cpp | 275 ++++++++++++------ .../frontend/ir/ir_emitter.h | 67 +++-- src/shader_recompiler/frontend/ir/opcode.inc | 200 +++++++------ src/shader_recompiler/frontend/ir/type.cpp | 4 +- src/shader_recompiler/frontend/ir/type.h | 15 + src/shader_recompiler/frontend/ir/value.cpp | 28 ++ src/shader_recompiler/frontend/ir/value.h | 10 + .../translate/impl/floating_point_add.cpp | 12 +- .../floating_point_conversion_integer.cpp | 20 +- .../floating_point_fused_multiply_add.cpp | 16 +- .../impl/floating_point_multi_function.cpp | 6 +- .../impl/floating_point_multiply.cpp | 13 +- .../frontend/maxwell/translate/impl/impl.cpp | 20 ++ .../frontend/maxwell/translate/impl/impl.h | 6 + .../translate/impl/load_store_attribute.cpp | 23 +- .../translate/impl/load_store_memory.cpp | 4 +- .../ir_opt/constant_propagation_pass.cpp | 20 ++ .../global_memory_to_storage_buffer_pass.cpp | 21 +- src/shader_recompiler/main.cpp | 2 +- 19 files changed, 494 insertions(+), 268 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1c5ae0109..9d7dc034c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -32,16 +32,16 @@ U32 IREmitter::Imm32(s32 value) const { return U32{Value{static_cast(value)}}; } -U32 IREmitter::Imm32(f32 value) const { - return U32{Value{Common::BitCast(value)}}; +F32 IREmitter::Imm32(f32 value) const { + return F32{Value{value}}; } U64 IREmitter::Imm64(u64 value) const { return U64{Value{value}}; } -U64 IREmitter::Imm64(f64 value) const { - return U64{Value{Common::BitCast(value)}}; +F64 IREmitter::Imm64(f64 value) const { + return F64{Value{value}}; } void IREmitter::Branch(IR::Block* label) { @@ -121,11 +121,11 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U32 IREmitter::GetAttribute(IR::Attribute attribute) { - return Inst(Opcode::GetAttribute, attribute); +F32 IREmitter::GetAttribute(IR::Attribute attribute) { + return Inst(Opcode::GetAttribute, attribute); } -void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } @@ -225,50 +225,113 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } -U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { +F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { - case Type::U16: - return Inst(Opcode::FPAdd16, Flags{control}, a, b); - case Type::U32: - return Inst(Opcode::FPAdd32, Flags{control}, a, b); - case Type::U64: - return Inst(Opcode::FPAdd64, Flags{control}, a, b); + case Type::F16: + return Inst(Opcode::FPAdd16, Flags{control}, a, b); + case Type::F32: + return Inst(Opcode::FPAdd32, Flags{control}, a, b); + case Type::F64: + return Inst(Opcode::FPAdd64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } } -Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } - return Inst(Opcode::CompositeConstruct2, e1, e2); + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x2, e1, e2); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x2, e1, e2); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x2, e1, e2); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x2, e1, e2); + default: + ThrowInvalidType(e1.Type()); + } } -Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); } - return Inst(Opcode::CompositeConstruct3, e1, e2, e3); + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3); + default: + ThrowInvalidType(e1.Type()); + } } -Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, - const UAny& e4) { +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } - return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + default: + ThrowInvalidType(e1.Type()); + } } -UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { - if (element >= 4) { - throw InvalidArgument("Out of bounds element {}", element); +Value IREmitter::CompositeExtract(const Value& vector, size_t element) { + const auto read = [&](Opcode opcode, size_t limit) -> Value { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, Value{static_cast(element)}); + }; + switch (vector.Type()) { + case Type::U32x2: + return read(Opcode::CompositeExtractU32x2, 2); + case Type::U32x3: + return read(Opcode::CompositeExtractU32x3, 3); + case Type::U32x4: + return read(Opcode::CompositeExtractU32x4, 4); + case Type::F16x2: + return read(Opcode::CompositeExtractF16x2, 2); + case Type::F16x3: + return read(Opcode::CompositeExtractF16x3, 3); + case Type::F16x4: + return read(Opcode::CompositeExtractF16x4, 4); + case Type::F32x2: + return read(Opcode::CompositeExtractF32x2, 2); + case Type::F32x3: + return read(Opcode::CompositeExtractF32x3, 3); + case Type::F32x4: + return read(Opcode::CompositeExtractF32x4, 4); + case Type::F64x2: + return read(Opcode::CompositeExtractF64x2, 2); + case Type::F64x3: + return read(Opcode::CompositeExtractF64x3, 3); + case Type::F64x4: + return read(Opcode::CompositeExtractF64x4, 4); + default: + ThrowInvalidType(vector.Type()); } - return Inst(Opcode::CompositeExtract, vector, Imm32(static_cast(element))); } UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { @@ -289,6 +352,36 @@ UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& } } +template <> +IR::U32 IREmitter::BitCast(const IR::F32& value) { + return Inst(Opcode::BitCastU32F32, value); +} + +template <> +IR::F32 IREmitter::BitCast(const IR::U32& value) { + return Inst(Opcode::BitCastF32U32, value); +} + +template <> +IR::U16 IREmitter::BitCast(const IR::F16& value) { + return Inst(Opcode::BitCastU16F16, value); +} + +template <> +IR::F16 IREmitter::BitCast(const IR::U16& value) { + return Inst(Opcode::BitCastF16U16, value); +} + +template <> +IR::U64 IREmitter::BitCast(const IR::F64& value) { + return Inst(Opcode::BitCastU64F64, value); +} + +template <> +IR::F64 IREmitter::BitCast(const IR::U64& value) { + return Inst(Opcode::BitCastF64U64, value); +} + U64 IREmitter::PackUint2x32(const Value& vector) { return Inst(Opcode::PackUint2x32, vector); } @@ -305,75 +398,75 @@ Value IREmitter::UnpackFloat2x16(const U32& value) { return Inst(Opcode::UnpackFloat2x16, value); } -U64 IREmitter::PackDouble2x32(const Value& vector) { - return Inst(Opcode::PackDouble2x32, vector); +F64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst(Opcode::PackDouble2x32, vector); } -Value IREmitter::UnpackDouble2x32(const U64& value) { +Value IREmitter::UnpackDouble2x32(const F64& value) { return Inst(Opcode::UnpackDouble2x32, value); } -U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { +F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { - case Type::U16: - return Inst(Opcode::FPMul16, Flags{control}, a, b); - case Type::U32: - return Inst(Opcode::FPMul32, Flags{control}, a, b); - case Type::U64: - return Inst(Opcode::FPMul64, Flags{control}, a, b); + case Type::F16: + return Inst(Opcode::FPMul16, Flags{control}, a, b); + case Type::F32: + return Inst(Opcode::FPMul32, Flags{control}, a, b); + case Type::F64: + return Inst(Opcode::FPMul64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } } -U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, +F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, FpControl control) { if (a.Type() != b.Type() || a.Type() != c.Type()) { throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); } switch (a.Type()) { - case Type::U16: - return Inst(Opcode::FPFma16, Flags{control}, a, b, c); - case Type::U32: - return Inst(Opcode::FPFma32, Flags{control}, a, b, c); - case Type::U64: - return Inst(Opcode::FPFma64, Flags{control}, a, b, c); + case Type::F16: + return Inst(Opcode::FPFma16, Flags{control}, a, b, c); + case Type::F32: + return Inst(Opcode::FPFma32, Flags{control}, a, b, c); + case Type::F64: + return Inst(Opcode::FPFma64, Flags{control}, a, b, c); default: ThrowInvalidType(a.Type()); } } -U16U32U64 IREmitter::FPAbs(const U16U32U64& value) { +F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPAbs16, value); + return Inst(Opcode::FPAbs16, value); case Type::U32: - return Inst(Opcode::FPAbs32, value); + return Inst(Opcode::FPAbs32, value); case Type::U64: - return Inst(Opcode::FPAbs64, value); + return Inst(Opcode::FPAbs64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPNeg(const U16U32U64& value) { +F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPNeg16, value); + return Inst(Opcode::FPNeg16, value); case Type::U32: - return Inst(Opcode::FPNeg32, value); + return Inst(Opcode::FPNeg32, value); case Type::U64: - return Inst(Opcode::FPNeg64, value); + return Inst(Opcode::FPNeg64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { - U16U32U64 result{value}; +F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { + F16F32F64 result{value}; if (abs) { result = FPAbs(value); } @@ -383,108 +476,108 @@ U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { return result; } -U32 IREmitter::FPCosNotReduced(const U32& value) { - return Inst(Opcode::FPCosNotReduced, value); +F32 IREmitter::FPCosNotReduced(const F32& value) { + return Inst(Opcode::FPCosNotReduced, value); } -U32 IREmitter::FPExp2NotReduced(const U32& value) { - return Inst(Opcode::FPExp2NotReduced, value); +F32 IREmitter::FPExp2NotReduced(const F32& value) { + return Inst(Opcode::FPExp2NotReduced, value); } -U32 IREmitter::FPLog2(const U32& value) { - return Inst(Opcode::FPLog2, value); +F32 IREmitter::FPLog2(const F32& value) { + return Inst(Opcode::FPLog2, value); } -U32U64 IREmitter::FPRecip(const U32U64& value) { +F32F64 IREmitter::FPRecip(const F32F64& value) { switch (value.Type()) { case Type::U32: - return Inst(Opcode::FPRecip32, value); + return Inst(Opcode::FPRecip32, value); case Type::U64: - return Inst(Opcode::FPRecip64, value); + return Inst(Opcode::FPRecip64, value); default: ThrowInvalidType(value.Type()); } } -U32U64 IREmitter::FPRecipSqrt(const U32U64& value) { +F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { switch (value.Type()) { case Type::U32: - return Inst(Opcode::FPRecipSqrt32, value); + return Inst(Opcode::FPRecipSqrt32, value); case Type::U64: - return Inst(Opcode::FPRecipSqrt64, value); + return Inst(Opcode::FPRecipSqrt64, value); default: ThrowInvalidType(value.Type()); } } -U32 IREmitter::FPSinNotReduced(const U32& value) { - return Inst(Opcode::FPSinNotReduced, value); +F32 IREmitter::FPSinNotReduced(const F32& value) { + return Inst(Opcode::FPSinNotReduced, value); } -U32 IREmitter::FPSqrt(const U32& value) { - return Inst(Opcode::FPSqrt, value); +F32 IREmitter::FPSqrt(const F32& value) { + return Inst(Opcode::FPSqrt, value); } -U16U32U64 IREmitter::FPSaturate(const U16U32U64& value) { +F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPSaturate16, value); + return Inst(Opcode::FPSaturate16, value); case Type::U32: - return Inst(Opcode::FPSaturate32, value); + return Inst(Opcode::FPSaturate32, value); case Type::U64: - return Inst(Opcode::FPSaturate64, value); + return Inst(Opcode::FPSaturate64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPRoundEven(const U16U32U64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, value); case Type::U32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, value); case Type::U64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPFloor(const U16U32U64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, value); case Type::U32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, value); case Type::U64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPCeil(const U16U32U64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, value); case Type::U32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, value); case Type::U64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, value); case Type::U32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, value); case Type::U64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, value); default: ThrowInvalidType(value.Type()); } @@ -605,7 +698,7 @@ U1 IREmitter::LogicalNot(const U1& value) { return Inst(Opcode::LogicalNot, value); } -U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { +U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { @@ -645,7 +738,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { } } -U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { +U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { @@ -685,7 +778,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { } } -U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value) { +U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { if (is_signed) { return ConvertFToS(bitsize, value); } else { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 84b844898..bfd9916cc 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -27,9 +27,9 @@ public: [[nodiscard]] U16 Imm16(u16 value) const; [[nodiscard]] U32 Imm32(u32 value) const; [[nodiscard]] U32 Imm32(s32 value) const; - [[nodiscard]] U32 Imm32(f32 value) const; + [[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] U64 Imm64(u64 value) const; - [[nodiscard]] U64 Imm64(f64 value) const; + [[nodiscard]] F64 Imm64(f64 value) const; void Branch(IR::Block* label); void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); @@ -55,8 +55,8 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); - [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); - void SetAttribute(IR::Attribute attribute, const U32& value); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); + void SetAttribute(IR::Attribute attribute, const F32& value); [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); @@ -87,44 +87,47 @@ public: [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); - [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2); - [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3); - [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, - const UAny& e4); - [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + template + [[nodiscard]] Dest BitCast(const Source& value); + [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); [[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] Value UnpackFloat2x16(const U32& value); - [[nodiscard]] U64 PackDouble2x32(const Value& vector); - [[nodiscard]] Value UnpackDouble2x32(const U64& value); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const F64& value); - [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); - [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); - [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); + [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, FpControl control = {}); - [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPAbsNeg(const U16U32U64& value, bool abs, bool neg); + [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); - [[nodiscard]] U32 FPCosNotReduced(const U32& value); - [[nodiscard]] U32 FPExp2NotReduced(const U32& value); - [[nodiscard]] U32 FPLog2(const U32& value); - [[nodiscard]] U32U64 FPRecip(const U32U64& value); - [[nodiscard]] U32U64 FPRecipSqrt(const U32U64& value); - [[nodiscard]] U32 FPSinNotReduced(const U32& value); - [[nodiscard]] U32 FPSqrt(const U32& value); - [[nodiscard]] U16U32U64 FPSaturate(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPRoundEven(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPFloor(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + [[nodiscard]] F32 FPCosNotReduced(const F32& value); + [[nodiscard]] F32 FPExp2NotReduced(const F32& value); + [[nodiscard]] F32 FPLog2(const F32& value); + [[nodiscard]] F32F64 FPRecip(const F32F64& value); + [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); + [[nodiscard]] F32 FPSinNotReduced(const F32& value); + [[nodiscard]] F32 FPSqrt(const F32& value); + [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); @@ -154,9 +157,9 @@ public: [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); [[nodiscard]] U1 LogicalNot(const U1& value); - [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); - [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); - [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); + [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); + [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); + [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 4596bf39f..6eb105d92 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -52,15 +52,15 @@ OPCODE(LoadGlobalS8, U32, U64, OPCODE(LoadGlobalU16, U32, U64, ) OPCODE(LoadGlobalS16, U32, U64, ) OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, Opaque, U64, ) -OPCODE(LoadGlobal128, Opaque, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, ) OPCODE(WriteGlobalS16, Void, U64, U32, ) OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, Opaque, ) -OPCODE(WriteGlobal128, Void, U64, Opaque, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) // Storage buffer operations OPCODE(LoadStorageU8, U32, U32, U32, ) @@ -68,21 +68,41 @@ OPCODE(LoadStorageS8, U32, U32, OPCODE(LoadStorageU16, U32, U32, U32, ) OPCODE(LoadStorageS16, U32, U32, U32, ) OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, Opaque, U32, U32, ) -OPCODE(LoadStorage128, Opaque, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, Opaque, ) -OPCODE(WriteStorage128, Void, U32, U32, Opaque, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) // Vector utility -OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) -OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(CompositeExtract, Opaque, Opaque, U32, ) +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) // Select operations OPCODE(Select8, U8, U1, U8, U8, ) @@ -91,12 +111,18 @@ OPCODE(Select32, U32, U1, OPCODE(Select64, U64, U1, U64, U64, ) // Bitwise conversions -OPCODE(PackUint2x32, U64, Opaque, ) -OPCODE(UnpackUint2x32, Opaque, U64, ) -OPCODE(PackFloat2x16, U32, Opaque, ) -OPCODE(UnpackFloat2x16, Opaque, U32, ) -OPCODE(PackDouble2x32, U64, Opaque, ) -OPCODE(UnpackDouble2x32, Opaque, U64, ) +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackDouble2x32, U64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, U64, ) // Pseudo-operation, handled specially at final emit OPCODE(GetZeroFromOp, U1, Opaque, ) @@ -105,52 +131,52 @@ OPCODE(GetCarryFromOp, U1, Opaq OPCODE(GetOverflowFromOp, U1, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, U16, U16, ) -OPCODE(FPAbs32, U32, U32, ) -OPCODE(FPAbs64, U64, U64, ) -OPCODE(FPAdd16, U16, U16, U16, ) -OPCODE(FPAdd32, U32, U32, U32, ) -OPCODE(FPAdd64, U64, U64, U64, ) -OPCODE(FPFma16, U16, U16, U16, U16, ) -OPCODE(FPFma32, U32, U32, U32, U32, ) -OPCODE(FPFma64, U64, U64, U64, U64, ) -OPCODE(FPMax32, U32, U32, U32, ) -OPCODE(FPMax64, U64, U64, U64, ) -OPCODE(FPMin32, U32, U32, U32, ) -OPCODE(FPMin64, U64, U64, U64, ) -OPCODE(FPMul16, U16, U16, U16, ) -OPCODE(FPMul32, U32, U32, U32, ) -OPCODE(FPMul64, U64, U64, U64, ) -OPCODE(FPNeg16, U16, U16, ) -OPCODE(FPNeg32, U32, U32, ) -OPCODE(FPNeg64, U64, U64, ) -OPCODE(FPRecip32, U32, U32, ) -OPCODE(FPRecip64, U64, U64, ) -OPCODE(FPRecipSqrt32, U32, U32, ) -OPCODE(FPRecipSqrt64, U64, U64, ) -OPCODE(FPSqrt, U32, U32, ) -OPCODE(FPSin, U32, U32, ) -OPCODE(FPSinNotReduced, U32, U32, ) -OPCODE(FPExp2, U32, U32, ) -OPCODE(FPExp2NotReduced, U32, U32, ) -OPCODE(FPCos, U32, U32, ) -OPCODE(FPCosNotReduced, U32, U32, ) -OPCODE(FPLog2, U32, U32, ) -OPCODE(FPSaturate16, U16, U16, ) -OPCODE(FPSaturate32, U32, U32, ) -OPCODE(FPSaturate64, U64, U64, ) -OPCODE(FPRoundEven16, U16, U16, ) -OPCODE(FPRoundEven32, U32, U32, ) -OPCODE(FPRoundEven64, U64, U64, ) -OPCODE(FPFloor16, U16, U16, ) -OPCODE(FPFloor32, U32, U32, ) -OPCODE(FPFloor64, U64, U64, ) -OPCODE(FPCeil16, U16, U16, ) -OPCODE(FPCeil32, U32, U32, ) -OPCODE(FPCeil64, U64, U64, ) -OPCODE(FPTrunc16, U16, U16, ) -OPCODE(FPTrunc32, U32, U32, ) -OPCODE(FPTrunc64, U64, U64, ) +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPSinNotReduced, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPExp2NotReduced, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPCosNotReduced, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) @@ -188,24 +214,24 @@ OPCODE(LogicalXor, U1, U1, OPCODE(LogicalNot, U1, U1, ) // Conversion operations -OPCODE(ConvertS16F16, U32, U16, ) -OPCODE(ConvertS16F32, U32, U32, ) -OPCODE(ConvertS16F64, U32, U64, ) -OPCODE(ConvertS32F16, U32, U16, ) -OPCODE(ConvertS32F32, U32, U32, ) -OPCODE(ConvertS32F64, U32, U64, ) -OPCODE(ConvertS64F16, U64, U16, ) -OPCODE(ConvertS64F32, U64, U32, ) -OPCODE(ConvertS64F64, U64, U64, ) -OPCODE(ConvertU16F16, U32, U16, ) -OPCODE(ConvertU16F32, U32, U32, ) -OPCODE(ConvertU16F64, U32, U64, ) -OPCODE(ConvertU32F16, U32, U16, ) -OPCODE(ConvertU32F32, U32, U32, ) -OPCODE(ConvertU32F64, U32, U64, ) -OPCODE(ConvertU64F16, U64, U16, ) -OPCODE(ConvertU64F32, U64, U32, ) -OPCODE(ConvertU64F64, U64, U64, ) +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp index 13cc09195..f28341bfe 100644 --- a/src/shader_recompiler/frontend/ir/type.cpp +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -11,7 +11,9 @@ namespace Shader::IR { std::string NameOf(Type type) { static constexpr std::array names{ - "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", + "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3", + "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4", }; const size_t bits{static_cast(type)}; if (bits == 0) { diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 397875018..9a32ca1e8 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -25,6 +25,21 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 59a9b10dc..93ff8ccf1 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -26,8 +26,12 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} +Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} + Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} +Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} + bool Value::IsIdentity() const noexcept { return type == Type::Opaque && inst->Opcode() == Opcode::Identity; } @@ -122,6 +126,14 @@ u32 Value::U32() const { return imm_u32; } +f32 Value::F32() const { + if (IsIdentity()) { + return inst->Arg(0).F32(); + } + ValidateAccess(Type::F32); + return imm_f32; +} + u64 Value::U64() const { if (IsIdentity()) { return inst->Arg(0).U64(); @@ -152,11 +164,27 @@ bool Value::operator==(const Value& other) const { case Type::U8: return imm_u8 == other.imm_u8; case Type::U16: + case Type::F16: return imm_u16 == other.imm_u16; case Type::U32: + case Type::F32: return imm_u32 == other.imm_u32; case Type::U64: + case Type::F64: return imm_u64 == other.imm_u64; + case Type::U32x2: + case Type::U32x3: + case Type::U32x4: + case Type::F16x2: + case Type::F16x3: + case Type::F16x4: + case Type::F32x2: + case Type::F32x3: + case Type::F32x4: + case Type::F64x2: + case Type::F64x3: + case Type::F64x4: + break; } throw LogicError("Invalid type {}", type); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 31f831794..2f3688c73 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -28,7 +28,9 @@ public: explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; explicit Value(u32 value) noexcept; + explicit Value(f32 value) noexcept; explicit Value(u64 value) noexcept; + explicit Value(f64 value) noexcept; [[nodiscard]] bool IsIdentity() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; @@ -46,6 +48,7 @@ public: [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; [[nodiscard]] u32 U32() const; + [[nodiscard]] f32 F32() const; [[nodiscard]] u64 U64() const; [[nodiscard]] bool operator==(const Value& other) const; @@ -65,7 +68,9 @@ private: u8 imm_u8; u16 imm_u16; u32 imm_u32; + f32 imm_f32; u64 imm_u64; + f64 imm_f64; }; }; @@ -93,8 +98,13 @@ using U8 = TypedValue; using U16 = TypedValue; using U32 = TypedValue; using U64 = TypedValue; +using F16 = TypedValue; +using F32 = TypedValue; +using F64 = TypedValue; using U32U64 = TypedValue; +using F32F64 = TypedValue; using U16U32U64 = TypedValue; +using F16F32F64 = TypedValue; using UAny = TypedValue; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index d2c44b9cc..cb3a326cf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -11,7 +11,7 @@ namespace Shader::Maxwell { namespace { void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, - const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; @@ -24,17 +24,17 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin if (cc) { throw NotImplementedException("FADD CC"); } - const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); } -void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { +void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { union { u64 raw; BitField<39, 2, FpRounding> fp_rounding; @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20(insn)); + FADD(*this, insn, GetReg20F(insn)); } void TranslatorVisitor::FADD_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index c4288d9a8..acd8445ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -55,21 +55,21 @@ size_t BitSize(DestFormat dest_format) { } } -void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; - const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; - const IR::U16U32U64 rounded_value{[&] { + const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(float_value); + return v.ir.FPRoundEven(op_a); case Rounding::Floor: - return v.ir.FPFloor(float_value); + return v.ir.FPFloor(op_a); case Rounding::Ceil: - return v.ir.FPCeil(float_value); + return v.ir.FPCeil(op_a); case Rounding::Trunc: - return v.ir.FPTrunc(float_value); + return v.ir.FPTrunc(op_a); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } @@ -105,12 +105,12 @@ void TranslatorVisitor::F2I_reg(u64 insn) { BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; - const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { switch (f2i.base.src_format) { case SrcFormat::F16: - return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; case SrcFormat::F32: - return X(f2i.src_reg); + return F(f2i.src_reg); case SrcFormat::F64: return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); default: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 30ca052ec..1464f2807 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -9,7 +9,7 @@ namespace Shader::Maxwell { namespace { -void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { union { u64 raw; @@ -23,18 +23,18 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s if (cc) { throw NotImplementedException("FFMA CC"); } - const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; - const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); } -void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { union { u64 raw; BitField<47, 1, u64> cc; @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); + FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); + FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index e2ab0dab2..90cddb18b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -35,8 +35,8 @@ void TranslatorVisitor::MUFU(u64 insn) { BitField<50, 1, u64> sat; } const mufu{insn}; - const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; - IR::U32 value{[&]() -> IR::U32 { + const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: return ir.FPCosNotReduced(op_a); @@ -65,7 +65,7 @@ void TranslatorVisitor::MUFU(u64 insn) { value = ir.FPSaturate(value); } - X(mufu.dest_reg, value); + F(mufu.dest_reg, value); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 743a1e2f0..1b1d38be7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -4,6 +4,7 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" @@ -43,7 +44,7 @@ float ScaleFactor(Scale scale) { throw NotImplementedException("Invalid FMUL scale {}", scale); } -void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { union { u64 raw; @@ -57,23 +58,23 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode if (sat) { throw NotImplementedException("FMUL SAT"); } - IR::U32 op_a{v.X(fmul.src_a)}; + IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); } op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); } - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); } -void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { union { u64 raw; BitField<39, 2, FpRounding> fp_rounding; @@ -90,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20(insn)); + return FMUL(*this, insn, GetReg20F(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 548c7f611..3c9eaddd9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -12,10 +12,18 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::F32 TranslatorVisitor::F(IR::Reg reg) { + return ir.BitCast(X(reg)); +} + void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { + X(dest_reg, ir.BitCast(value)); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; @@ -32,6 +40,14 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { + return ir.BitCast(GetReg20(insn)); +} + +IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { + return ir.BitCast(GetReg39(insn)); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -49,6 +65,10 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } +IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { + return ir.BitCast(GetCbuf(insn)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ef6d977fe..b701605d7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -296,12 +296,18 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::F32 F(IR::Reg reg); + void X(IR::Reg dest_reg, const IR::U32& value); + void F(IR::Reg dest_reg, const IR::F32& value); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetReg20F(u64 insn); + [[nodiscard]] IR::F32 GetReg39F(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); + [[nodiscard]] IR::F32 GetCbufF(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 23512db1a..de65173e8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -5,22 +5,23 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { enum class InterpolationMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, + Pass, + Multiply, + Constant, + Sc, }; enum class SampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, + Default, + Centroid, + Offset, }; } // Anonymous namespace @@ -54,12 +55,12 @@ void TranslatorVisitor::IPA(u64 insn) { } const IR::Attribute attribute{ipa.attribute}; - IR::U32 value{ir.GetAttribute(attribute)}; + IR::F32 value{ir.GetAttribute(attribute)}; if (IR::IsGeneric(attribute)) { // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; const bool is_perspective{false}; if (is_perspective) { - const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; + const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; value = ir.FPMul(value, rcp_position_w); } } @@ -68,7 +69,7 @@ void TranslatorVisitor::IPA(u64 insn) { case InterpolationMode::Pass: break; case InterpolationMode::Multiply: - value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); + value = ir.FPMul(value, F(ipa.multiplier)); break; case InterpolationMode::Constant: throw NotImplementedException("IPA.CONSTANT"); @@ -86,7 +87,7 @@ void TranslatorVisitor::IPA(u64 insn) { value = ir.FPSaturate(value); } - ir.SetReg(ipa.dest_reg, value); + F(ipa.dest_reg, value); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index c9669c617..9f1570479 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, ir.CompositeExtract(vector, i)); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } break; } @@ -124,7 +124,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, ir.CompositeExtract(vector, i)); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } break; } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 02f5b653d..7fb3192d8 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -5,6 +5,7 @@ #include #include +#include "common/bit_cast.h" #include "common/bit_util.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" @@ -25,6 +26,8 @@ template return value.U1(); } else if constexpr (std::is_same_v) { return value.U32(); + } else if constexpr (std::is_same_v) { + return value.F32(); } else if constexpr (std::is_same_v) { return value.U64(); } @@ -115,6 +118,19 @@ void FoldLogicalAnd(IR::Inst& inst) { } } +template +void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{Common::BitCast(Arg(value))}); + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (value.InstRecursive()->Opcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + } +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -123,6 +139,10 @@ void ConstantPropagation(IR::Inst& inst) { return FoldGetPred(inst); case IR::Opcode::IAdd32: return FoldAdd(inst); + case IR::Opcode::BitCastF32U32: + return FoldBitCast(inst, IR::Opcode::BitCastU32F32); + case IR::Opcode::BitCastU32F32: + return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(inst); case IR::Opcode::BitFieldUExtract: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index ee69a5c9d..34393e1d5 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -108,8 +108,8 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce storage_buffer.offset < bias.offset_end; } -/// Ignores a global memory operation, reads return zero and writes are ignored -void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +/// Discards a global memory operation, reads return zero and writes are ignored +void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { const IR::Value zero{u32{0}}; switch (inst->Opcode()) { case IR::Opcode::LoadGlobalS8: @@ -120,12 +120,12 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { inst->ReplaceUsesWith(zero); break; case IR::Opcode::LoadGlobal64: - inst->ReplaceUsesWith( - IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); + inst->ReplaceUsesWith(IR::Value{ + &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); break; case IR::Opcode::LoadGlobal128: inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( - inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); + inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); break; case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: @@ -137,7 +137,8 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { inst->Invalidate(); break; default: - throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", + inst->Opcode()); } } @@ -196,7 +197,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, storage_buffer = Track(addr, nullptr); if (!storage_buffer) { // If that also failed, drop the global memory usage - IgnoreGlobalMemory(block, inst); + DiscardGlobalMemory(block, inst); } } // Collect storage buffer and the instruction @@ -242,12 +243,12 @@ std::optional TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { if (vector.IsImmediate()) { return std::nullopt; } - // This vector is expected to be a CompositeConstruct2 + // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { + if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } - // Grab the first argument from the CompositeConstruct2, this is the low address. + // Grab the first argument from the CompositeConstructU32x2, this is the low address. // Re-apply the offset in case we found one. const IR::U32 low_addr{vector_inst->Arg(0)}; return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 4022c6fe2..e6596d828 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -52,7 +52,7 @@ int main() { // RunDatabase(); // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; auto cfg{std::make_unique(env, 0)}; // fmt::print(stdout, "{}\n", cfg->Dot()); From 16cb00c521cae6e93ec49d10e15b575b7bc4857e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 23:11:23 -0300 Subject: [PATCH 008/770] shader: Add pools and rename files --- src/shader_recompiler/CMakeLists.txt | 14 +-- .../backend/spirv/emit_spirv.h | 21 +++++ .../frontend/ir/basic_block.cpp | 5 +- .../frontend/ir/basic_block.h | 11 +-- src/shader_recompiler/frontend/ir/function.h | 12 +-- .../frontend/ir/microinstruction.h | 2 +- .../frontend/ir/{opcode.cpp => opcodes.cpp} | 4 +- .../frontend/ir/{opcode.h => opcodes.h} | 2 +- .../frontend/ir/{opcode.inc => opcodes.inc} | 0 src/shader_recompiler/frontend/ir/program.cpp | 38 ++++++++ src/shader_recompiler/frontend/ir/program.h | 21 +++++ src/shader_recompiler/frontend/ir/value.cpp | 2 +- .../frontend/maxwell/control_flow.h | 2 +- .../frontend/maxwell/decode.cpp | 2 +- .../frontend/maxwell/decode.h | 2 +- .../maxwell/{opcode.cpp => opcodes.cpp} | 2 +- .../frontend/maxwell/{opcode.h => opcodes.h} | 0 .../frontend/maxwell/program.cpp | 49 ++++------ .../frontend/maxwell/program.h | 22 ++--- .../floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_multi_function.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 2 +- .../translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/move_register.cpp | 2 +- .../translate/impl/not_implemented.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 5 +- .../frontend/maxwell/translate/translate.h | 7 +- .../ir_opt/ssa_rewrite_pass.cpp | 28 +++--- src/shader_recompiler/main.cpp | 11 ++- src/shader_recompiler/object_pool.h | 89 +++++++++++++++++++ 30 files changed, 255 insertions(+), 108 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv.h rename src/shader_recompiler/frontend/ir/{opcode.cpp => opcodes.cpp} (96%) rename src/shader_recompiler/frontend/ir/{opcode.h => opcodes.h} (97%) rename src/shader_recompiler/frontend/ir/{opcode.inc => opcodes.inc} (100%) create mode 100644 src/shader_recompiler/frontend/ir/program.cpp create mode 100644 src/shader_recompiler/frontend/ir/program.h rename src/shader_recompiler/frontend/maxwell/{opcode.cpp => opcodes.cpp} (92%) rename src/shader_recompiler/frontend/maxwell/{opcode.h => opcodes.h} (100%) create mode 100644 src/shader_recompiler/object_pool.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 72d5f41d2..248e90d4b 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_executable(shader_recompiler + backend/spirv/emit_spirv.h environment.h exception.h file_environment.cpp @@ -17,10 +18,12 @@ add_executable(shader_recompiler frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h - frontend/ir/opcode.cpp - frontend/ir/opcode.h - frontend/ir/opcode.inc + frontend/ir/opcodes.cpp + frontend/ir/opcodes.h + frontend/ir/opcodes.inc frontend/ir/pred.h + frontend/ir/program.cpp + frontend/ir/program.h frontend/ir/reg.h frontend/ir/type.cpp frontend/ir/type.h @@ -33,8 +36,8 @@ add_executable(shader_recompiler frontend/maxwell/instruction.h frontend/maxwell/location.h frontend/maxwell/maxwell.inc - frontend/maxwell/opcode.cpp - frontend/maxwell/opcode.h + frontend/maxwell/opcodes.cpp + frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h frontend/maxwell/termination_code.cpp @@ -67,6 +70,7 @@ add_executable(shader_recompiler ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp main.cpp + object_pool.h ) target_link_libraries(shader_recompiler PRIVATE fmt::fmt) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 000000000..99cc8e08a --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::SPIRV { + +class EmitSPIRV { +public: +private: + // Microinstruction emitters +#define OPCODE(name, result_type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 249251dd0..1a5d82135 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -14,7 +14,8 @@ namespace Shader::IR { -Block::Block(u32 begin, u32 end) : location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool& inst_pool_, u32 begin, u32 end) + : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} Block::~Block() = default; @@ -24,7 +25,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u64 flags) { - Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; + Inst* const inst{inst_pool->Create(op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec4a41cb1..ec3ad6263 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -10,9 +10,9 @@ #include #include -#include #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -25,7 +25,7 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(u32 begin, u32 end); + explicit Block(ObjectPool& inst_pool_, u32 begin, u32 end); ~Block(); Block(const Block&) = delete; @@ -119,6 +119,8 @@ public: } private: + /// Memory pool for instruction list + ObjectPool* inst_pool; /// Starting location of this block u32 location_begin; /// End location of this block @@ -127,11 +129,6 @@ private: /// List of instructions in this block InstructionList instructions; - /// Memory pool for instruction list - boost::fast_pool_allocator - instruction_alloc_pool; - /// Block immediate predecessors std::vector imm_predecessors; }; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index 2d4dc5b98..bba7d1d39 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -4,22 +4,14 @@ #pragma once -#include -#include +#include #include "shader_recompiler/frontend/ir/basic_block.h" namespace Shader::IR { struct Function { - struct InplaceDelete { - void operator()(IR::Block* block) const noexcept { - std::destroy_at(block); - } - }; - using UniqueBlock = std::unique_ptr; - - std::vector blocks; + boost::container::small_vector blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 22101c9e2..80baffb2e 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -13,7 +13,7 @@ #include #include "common/common_types.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/type.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/frontend/ir/opcode.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp similarity index 96% rename from src/shader_recompiler/frontend/ir/opcode.cpp rename to src/shader_recompiler/frontend/ir/opcodes.cpp index 65d074029..1f188411a 100644 --- a/src/shader_recompiler/frontend/ir/opcode.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -7,7 +7,7 @@ #include #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" namespace Shader::IR { namespace { @@ -26,7 +26,7 @@ constexpr std::array META_TABLE{ .type{type_token}, \ .arg_types{__VA_ARGS__}, \ }, -#include "opcode.inc" +#include "opcodes.inc" #undef OPCODE }; diff --git a/src/shader_recompiler/frontend/ir/opcode.h b/src/shader_recompiler/frontend/ir/opcodes.h similarity index 97% rename from src/shader_recompiler/frontend/ir/opcode.h rename to src/shader_recompiler/frontend/ir/opcodes.h index 1f4440379..999fb2e77 100644 --- a/src/shader_recompiler/frontend/ir/opcode.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -14,7 +14,7 @@ namespace Shader::IR { enum class Opcode { #define OPCODE(name, ...) name, -#include "opcode.inc" +#include "opcodes.inc" #undef OPCODE }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcodes.inc similarity index 100% rename from src/shader_recompiler/frontend/ir/opcode.inc rename to src/shader_recompiler/frontend/ir/opcodes.inc diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp new file mode 100644 index 000000000..0ce99ef2a --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::IR { + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map inst_to_index; + std::map block_to_index; + + for (const IR::Function& function : program.functions) { + for (const IR::Block* const block : function.blocks) { + block_to_index.emplace(block, index); + ++index; + } + } + std::string ret; + for (const IR::Function& function : program.functions) { + ret += fmt::format("Function\n"); + for (const auto& block : function.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + } + return ret; +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h new file mode 100644 index 000000000..efaf1aa1e --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.h @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "shader_recompiler/frontend/ir/function.h" + +namespace Shader::IR { + +struct Program { + boost::container::small_vector functions; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 93ff8ccf1..9ea61813b 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 20ada8afd..49b369282 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -16,7 +16,7 @@ #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell::Flow { diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index ab1cc6c8d..bd85afa1e 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/decode.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { namespace { diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h index 2a3dd28e8..b4f080fd7 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.h +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -5,7 +5,7 @@ #pragma once #include "common/common_types.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp similarity index 92% rename from src/shader_recompiler/frontend/maxwell/opcode.cpp rename to src/shader_recompiler/frontend/maxwell/opcodes.cpp index 8a7bdb611..12ddf2ac9 100644 --- a/src/shader_recompiler/frontend/maxwell/opcode.cpp +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { namespace { diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcodes.h similarity index 100% rename from src/shader_recompiler/frontend/maxwell/opcode.h rename to src/shader_recompiler/frontend/maxwell/opcodes.h diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b3f2de852..8cdd20804 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -5,6 +5,7 @@ #include #include +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -12,17 +13,18 @@ namespace Shader::Maxwell { namespace { -void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span block_map, IR::Block* block_memory) { +void TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span block_map) { const size_t num_blocks{cfg_function.blocks.size()}; function.blocks.reserve(num_blocks); for (const Flow::BlockId block_id : cfg_function.blocks) { const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); - block_map[flow_block.id] = function.blocks.back().get(); - ++block_memory; + IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; + block_map[flow_block.id] = ir_block; + function.blocks.emplace_back(ir_block); } } @@ -34,21 +36,24 @@ void EmitTerminationInsts(const Flow::Function& cfg_function, } } -void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - IR::Block* block_memory) { +void TranslateFunction(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::Function& cfg_function, + IR::Function& function) { std::vector block_map; block_map.resize(cfg_function.blocks_data.size()); - TranslateCode(env, cfg_function, function, block_map, block_memory); + TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); EmitTerminationInsts(cfg_function, block_map); } } // Anonymous namespace -Program::Program(Environment& env, const Flow::CFG& cfg) { +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::CFG& cfg) { + IR::Program program; + auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(env, cfg_function, functions.emplace_back(), - block_alloc_pool.allocate(cfg_function.blocks.size())); + TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { @@ -59,27 +64,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::VerificationPass(function); } //*/ -} - -std::string DumpProgram(const Program& program) { - size_t index{0}; - std::map inst_to_index; - std::map block_to_index; - - for (const IR::Function& function : program.functions) { - for (const auto& block : function.blocks) { - block_to_index.emplace(block.get(), index); - ++index; - } - } - std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } - } - return ret; + return program; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 36e678a9e..3355ab129 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -9,28 +9,16 @@ #include #include -#include #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -class Program { - friend std::string DumpProgram(const Program& program); - -public: - explicit Program(Environment& env, const Flow::CFG& cfg); - -private: - boost::pool_allocator - block_alloc_pool; - boost::container::small_vector functions; -}; - -[[nodiscard]] std::string DumpProgram(const Program& program); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + const Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index acd8445ad..3d0c48457 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index 90cddb18b..ba005fbf4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index de65173e8..ad97786d4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -6,7 +6,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 9f1570479..727524284 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1711d3f48..1f83d1068 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d70399f6b..1bb160acb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 66a306745..dcc3f6c0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,8 +23,9 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(Environment& env, const Flow::Block& flow_block) { - IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; +IR::Block Translate(ObjectPool& inst_pool, Environment& env, + const Flow::Block& flow_block) { + IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; TranslatorVisitor visitor{env, block}; const Location pc_end{flow_block.end}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index 788742dea..c1c21b278 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,11 +6,14 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); +[[nodiscard]] IR::Block Translate(ObjectPool& inst_pool, Environment& env, + const Flow::Block& flow_block); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a62d3f56b..7713e3ba9 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -19,7 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" #include "shader_recompiler/ir_opt/passes.h" @@ -150,52 +150,52 @@ private: void SsaRewritePass(IR::Function& function) { Pass pass; - for (const auto& block : function.blocks) { + for (IR::Block* const block : function.blocks) { for (IR::Inst& inst : block->Instructions()) { switch (inst.Opcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - pass.WriteVariable(reg, block.get(), inst.Arg(1)); + pass.WriteVariable(reg, block, inst.Arg(1)); } break; case IR::Opcode::SetPred: if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - pass.WriteVariable(pred, block.get(), inst.Arg(1)); + pass.WriteVariable(pred, block, inst.Arg(1)); } break; case IR::Opcode::SetZFlag: - pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetSFlag: - pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetCFlag: - pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetOFlag: - pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); } break; case IR::Opcode::GetPred: if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; case IR::Opcode::GetZFlag: - inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; case IR::Opcode::GetSFlag: - inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); break; case IR::Opcode::GetCFlag: - inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); break; case IR::Opcode::GetOFlag: - inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; default: break; diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index e6596d828..19e36590c 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -56,6 +56,13 @@ int main() { auto cfg{std::make_unique(env, 0)}; // fmt::print(stdout, "{}\n", cfg->Dot()); - Program program{env, *cfg}; - fmt::print(stdout, "{}\n", DumpProgram(program)); + auto inst_pool{std::make_unique>()}; + auto block_pool{std::make_unique>()}; + + for (int i = 0; i < 8192 * 4; ++i) { + void(inst_pool->Create(IR::Opcode::Void, 0)); + } + + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h new file mode 100644 index 000000000..7c65bbd92 --- /dev/null +++ b/src/shader_recompiler/object_pool.h @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +namespace Shader { + +template +requires std::is_destructible_v class ObjectPool { +public: + ~ObjectPool() { + std::unique_ptr tree_owner; + Chunk* chunk{&root}; + while (chunk) { + for (size_t obj_id = chunk->free_objects; obj_id < chunk_size; ++obj_id) { + chunk->storage[obj_id].object.~T(); + } + tree_owner = std::move(chunk->next); + chunk = tree_owner.get(); + } + } + + template + requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { + return std::construct_at(Memory(), std::forward(args)...); + } + + void ReleaseContents() { + Chunk* chunk{&root}; + if (chunk) { + const size_t free_objects{chunk->free_objects}; + if (free_objects == chunk_size) { + break; + } + chunk->free_objects = chunk_size; + for (size_t obj_id = free_objects; obj_id < chunk_size; ++obj_id) { + chunk->storage[obj_id].object.~T(); + } + chunk = chunk->next.get(); + } + node = &root; + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Storage { + Storage() noexcept {} + ~Storage() noexcept {} + + NonTrivialDummy dummy{}; + T object; + }; + + struct Chunk { + size_t free_objects = chunk_size; + std::array storage; + std::unique_ptr next; + }; + + [[nodiscard]] T* Memory() { + Chunk* const chunk{FreeChunk()}; + return &chunk->storage[--chunk->free_objects].object; + } + + [[nodiscard]] Chunk* FreeChunk() { + if (node->free_objects > 0) { + return node; + } + if (node->next) { + node = node->next.get(); + return node; + } + node->next = std::make_unique(); + node = node->next.get(); + return node; + } + + Chunk* node{&root}; + Chunk root; +}; + +} // namespace Shader From da8096e6e35af250dcc56a1af76b8a211df63a90 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Feb 2021 02:38:22 -0300 Subject: [PATCH 009/770] shader: Properly store phi on Inst --- .../frontend/ir/basic_block.cpp | 33 +++--- .../frontend/ir/microinstruction.cpp | 102 +++++++++++++----- .../frontend/ir/microinstruction.h | 37 +++++-- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- .../ir_opt/ssa_rewrite_pass.cpp | 22 ++-- src/shader_recompiler/object_pool.h | 11 +- 6 files changed, 132 insertions(+), 75 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 1a5d82135..50c6a83cd 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -129,26 +129,21 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - if (op == Opcode::Phi) { - size_t val_index{0}; - for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { - ret += val_index != 0 ? ", " : " "; - ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), - BlockToIndex(block_to_index, phi_block)); - ++val_index; + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + ret += arg_index != 0 ? ", " : " "; + if (op == Opcode::Phi) { + ret += fmt::format("[ {}, {} ]", arg_index, + BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); + } else { + ret += arg_str; } - } else { - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); - - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("", actual_type, expected_type); - } + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index de953838c..e7ca92039 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" @@ -30,6 +31,22 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) inst = nullptr; } +Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} { + if (op == Opcode::Phi) { + std::construct_at(&phi_args); + } else { + std::construct_at(&args); + } +} + +Inst::~Inst() { + if (op == Opcode::Phi) { + std::destroy_at(&phi_args); + } else { + std::destroy_at(&args); + } +} + bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: @@ -71,7 +88,10 @@ bool Inst::IsPseudoInstruction() const noexcept { } } -bool Inst::AreAllArgsImmediates() const noexcept { +bool Inst::AreAllArgsImmediates() const { + if (op == Opcode::Phi) { + throw LogicError("Testing for all arguments are immediates on phi instruction"); + } return std::all_of(args.begin(), args.begin() + NumArgs(), [](const IR::Value& value) { return value.IsImmediate(); }); } @@ -101,7 +121,7 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { } size_t Inst::NumArgs() const { - return NumArgsOf(op); + return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); } IR::Type Inst::Type() const { @@ -109,13 +129,23 @@ IR::Type Inst::Type() const { } Value Inst::Arg(size_t index) const { - if (index >= NumArgsOf(op)) { - throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + if (op == Opcode::Phi) { + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction", index); + } + return phi_args[index].second; + } else { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + return args[index]; } - return args[index]; } void Inst::SetArg(size_t index, Value value) { + if (op == Opcode::Phi) { + throw LogicError("Setting argument on a phi instruction"); + } if (index >= NumArgsOf(op)) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); } @@ -128,15 +158,21 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } -std::span> Inst::PhiOperands() const noexcept { - return phi_operands; +Block* Inst::PhiBlock(size_t index) const { + if (op != Opcode::Phi) { + throw LogicError("{} is not a Phi instruction", op); + } + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction"); + } + return phi_args[index].first; } void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } - phi_operands.emplace_back(predecessor, value); + phi_args.emplace_back(predecessor, value); } void Inst::Invalidate() { @@ -145,18 +181,22 @@ void Inst::Invalidate() { } void Inst::ClearArgs() { - for (auto& value : args) { - if (!value.IsImmediate()) { - UndoUse(value); + if (op == Opcode::Phi) { + for (auto& pair : phi_args) { + IR::Value& value{pair.second}; + if (!value.IsImmediate()) { + UndoUse(value); + } } - value = {}; - } - for (auto& [phi_block, phi_op] : phi_operands) { - if (!phi_op.IsImmediate()) { - UndoUse(phi_op); + phi_args.clear(); + } else { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + value = {}; } } - phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { @@ -167,24 +207,29 @@ void Inst::ReplaceUsesWith(Value replacement) { if (!replacement.IsImmediate()) { Use(replacement); } - args[0] = replacement; + if (op == Opcode::Phi) { + phi_args[0].second = replacement; + } else { + args[0] = replacement; + } } void Inst::Use(const Value& value) { - ++value.Inst()->use_count; + Inst* const inst{value.Inst()}; + ++inst->use_count; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(value.Inst()->zero_inst, this); + SetPseudoInstruction(inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(value.Inst()->sign_inst, this); + SetPseudoInstruction(inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(value.Inst()->carry_inst, this); + SetPseudoInstruction(inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(value.Inst()->overflow_inst, this); + SetPseudoInstruction(inst->overflow_inst, this); break; default: break; @@ -192,20 +237,21 @@ void Inst::Use(const Value& value) { } void Inst::UndoUse(const Value& value) { - --value.Inst()->use_count; + Inst* const inst{value.Inst()}; + --inst->use_count; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp); + RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp); + RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp); + RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); + RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 80baffb2e..ddf0f90a9 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -6,8 +6,8 @@ #include #include -#include #include +#include #include #include @@ -25,7 +25,14 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} + explicit Inst(Opcode op_, u64 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { @@ -50,26 +57,26 @@ public: [[nodiscard]] bool IsPseudoInstruction() const noexcept; /// Determines if all arguments of this instruction are immediates. - [[nodiscard]] bool AreAllArgsImmediates() const noexcept; + [[nodiscard]] bool AreAllArgsImmediates() const; /// Determines if there is a pseudo-operation associated with this instruction. [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); - /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const; - /// Get the type this instruction returns. [[nodiscard]] IR::Type Type() const; + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const; + /// Get the value of a given argument index. [[nodiscard]] Value Arg(size_t index) const; /// Set the value of a given argument index. void SetArg(size_t index, Value value); - /// Get an immutable span to the phi operands. - [[nodiscard]] std::span> PhiOperands() const noexcept; + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; /// Add phi operand to a phi instruction. void AddPhiOperand(Block* predecessor, const Value& value); @@ -87,18 +94,26 @@ public: } private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + void Use(const Value& value); void UndoUse(const Value& value); IR::Opcode op{}; int use_count{}; - std::array args{}; + u64 flags{}; + union { + NonTriviallyDummy dummy{}; + std::array args; + std::vector> phi_args; + }; Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; - std::vector> phi_operands; - u64 flags{}; }; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6eb105d92..82b04f37c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -3,9 +3,9 @@ // Refer to the license.txt file included. // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Phi, Opaque, ) OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7713e3ba9..15a9db90a 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -104,32 +104,34 @@ private: val = ReadVariable(variable, preds.front()); } else { // Break potential cycles with operandless phi - val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + val = IR::Value{&phi_inst}; WriteVariable(variable, block, val); - val = AddPhiOperands(variable, val, block); + val = AddPhiOperands(variable, phi_inst, block); } WriteVariable(variable, block, val); return val; } - IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const pred : block->ImmediatePredecessors()) { - phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + phi.AddPhiOperand(pred, ReadVariable(variable, pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } - IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { IR::Value same; - for (const auto& pair : phi.Inst()->PhiOperands()) { - const IR::Value& op{pair.second}; - if (op == same || op == phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { + const IR::Value& op{phi.Arg(arg_index)}; + if (op == same || op == IR::Value{&phi}) { // Unique value or self-reference continue; } if (!same.IsEmpty()) { // The phi merges at least two values: not trivial - return phi; + return IR::Value{&phi}; } same = op; } @@ -139,7 +141,7 @@ private: same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; } // Reroute all uses of phi to same and remove phi - phi.Inst()->ReplaceUsesWith(same); + phi.ReplaceUsesWith(same); // TODO: Try to recursively remove all phi users, which might have become trivial return same; } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 7c65bbd92..a573add32 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -6,6 +6,7 @@ #include #include +#include namespace Shader { @@ -31,14 +32,12 @@ public: void ReleaseContents() { Chunk* chunk{&root}; - if (chunk) { - const size_t free_objects{chunk->free_objects}; - if (free_objects == chunk_size) { + while (chunk) { + if (chunk->free_objects == chunk_size) { break; } - chunk->free_objects = chunk_size; - for (size_t obj_id = free_objects; obj_id < chunk_size; ++obj_id) { - chunk->storage[obj_id].object.~T(); + for (; chunk->free_objects < chunk_size; ++chunk->free_objects) { + chunk->storage[chunk->free_objects].object.~T(); } chunk = chunk->next.get(); } From 6dafb08f52ac78119669a698c4b9a39bffd48f8f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Feb 2021 04:47:53 -0300 Subject: [PATCH 010/770] shader: Better constant folding --- .../frontend/ir/basic_block.cpp | 2 +- .../ir_opt/constant_propagation_pass.cpp | 59 +++++++++++++++---- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 50c6a83cd..da33ff6f1 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -87,7 +87,7 @@ static std::string ArgToIndex(const std::map& block_to_ind } switch (arg.Type()) { case Type::U1: - return fmt::format("#{}", arg.U1() ? '1' : '0'); + return fmt::format("#{}", arg.U1() ? "true" : "false"); case Type::U8: return fmt::format("#{}", arg.U8()); case Type::U16: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7fb3192d8..f1170c61e 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include "common/bit_cast.h" @@ -13,12 +14,17 @@ namespace Shader::Optimization { namespace { -[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { - if (static_cast(shift) + static_cast(count) > Common::BitSize()) { - throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); - } - return (base >> shift) & ((1U << count) - 1); -} +// Metaprogramming stuff to get arguments information out of a lambda +template +struct LambdaTraits : LambdaTraits::operator())> {}; + +template +struct LambdaTraits { + template + using ArgType = std::tuple_element_t>; + + static constexpr size_t NUM_ARGS{sizeof...(Args)}; +}; template [[nodiscard]] T Arg(const IR::Value& value) { @@ -104,6 +110,14 @@ void FoldAdd(IR::Inst& inst) { } } +template +void FoldSelect(IR::Inst& inst) { + const IR::Value cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + } +} + void FoldLogicalAnd(IR::Inst& inst) { if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; @@ -131,6 +145,21 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { } } +template +IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { + using Traits = LambdaTraits; + return IR::Value{func(Arg>(inst.Arg(I))...)}; +} + +template +void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -145,14 +174,20 @@ void ConstantPropagation(IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(inst); - case IR::Opcode::BitFieldUExtract: - if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { - inst.ReplaceUsesWith(IR::Value{ - BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); - } - break; + case IR::Opcode::Select32: + return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::ULessThan: + return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + case IR::Opcode::BitFieldUExtract: + return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + if (static_cast(shift) + static_cast(count) > Common::BitSize()) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, + base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); + }); default: break; } From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: [PATCH 011/770] spirv: Initial SPIR-V support --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 15 +- .../backend/spirv/emit_spirv.cpp | 134 + .../backend/spirv/emit_spirv.h | 314 +- .../spirv/emit_spirv_bitwise_conversion.cpp | 57 + .../backend/spirv/emit_spirv_composite.cpp | 105 + .../spirv/emit_spirv_context_get_set.cpp | 102 + .../backend/spirv/emit_spirv_control_flow.cpp | 30 + .../spirv/emit_spirv_floating_point.cpp | 220 ++ .../backend/spirv/emit_spirv_integer.cpp | 132 + .../backend/spirv/emit_spirv_logical.cpp | 89 + .../backend/spirv/emit_spirv_memory.cpp | 125 + .../backend/spirv/emit_spirv_select.cpp | 25 + .../backend/spirv/emit_spirv_undefined.cpp | 29 + .../frontend/ir/ir_emitter.cpp | 12 +- src/shader_recompiler/frontend/ir/opcodes.inc | 12 +- .../frontend/maxwell/translate/translate.cpp | 10 +- .../ir_opt/identity_removal_pass.cpp | 2 +- src/shader_recompiler/main.cpp | 21 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 3166 ----------------- .../renderer_vulkan/vk_shader_decompiler.h | 99 - 21 files changed, 1401 insertions(+), 3300 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_select.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.h diff --git a/externals/sirit b/externals/sirit index eefca56af..1f7b70730 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit eefca56afd49379bdebc97ded8b480839f930881 +Subproject commit 1f7b70730d610cfbd5099ab93dd38ec8a78e7e35 diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 248e90d4b..12fbcb37c 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,5 +1,16 @@ add_executable(shader_recompiler + backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_bitwise_conversion.cpp + backend/spirv/emit_spirv_composite.cpp + backend/spirv/emit_spirv_context_get_set.cpp + backend/spirv/emit_spirv_control_flow.cpp + backend/spirv/emit_spirv_floating_point.cpp + backend/spirv/emit_spirv_integer.cpp + backend/spirv/emit_spirv_logical.cpp + backend/spirv/emit_spirv_memory.cpp + backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_undefined.cpp environment.h exception.h file_environment.cpp @@ -72,7 +83,9 @@ add_executable(shader_recompiler main.cpp object_pool.h ) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt) + +target_include_directories(video_core PRIVATE sirit) +target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) if (MSVC) target_compile_options(shader_recompiler PRIVATE diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 000000000..7c4269fad --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -0,0 +1,134 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::SPIRV { + +EmitContext::EmitContext(IR::Program& program) { + AddCapability(spv::Capability::Shader); + AddCapability(spv::Capability::Float16); + AddCapability(spv::Capability::Float64); + void_id = TypeVoid(); + + u1 = Name(TypeBool(), "u1"); + f32.Define(*this, TypeFloat(32), "f32"); + u32.Define(*this, TypeInt(32, false), "u32"); + f16.Define(*this, TypeFloat(16), "f16"); + f64.Define(*this, TypeFloat(64), "f64"); + + for (const IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + block_label_map.emplace_back(block, OpLabel()); + } + } + std::ranges::sort(block_label_map, {}, &std::pair::first); +} + +EmitContext::~EmitContext() = default; + +EmitSPIRV::EmitSPIRV(IR::Program& program) { + EmitContext ctx{program}; + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + // FIXME: Forward declare functions (needs sirit support) + Id func{}; + for (IR::Function& function : program.functions) { + func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); + for (IR::Block* const block : function.blocks) { + ctx.AddLabel(ctx.BlockLabel(block)); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); + } + } + ctx.OpFunctionEnd(); + } + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main"); + + std::vector result{ctx.Assemble()}; + std::FILE* file{std::fopen("shader.spv", "wb")}; + std::fwrite(result.data(), sizeof(u32), result.size(), file); + std::fclose(file); + std::system("spirv-dis shader.spv"); + std::system("spirv-val shader.spv"); +} + +template +static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { + using M = decltype(method); + using std::is_invocable_r_v; + if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx)); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), inst->Arg(1).U32())); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0), inst->Arg(1))); + } else if constexpr (is_invocable_r_v) { + (emit.*method)(ctx, inst); + } else if constexpr (is_invocable_r_v) { + (emit.*method)(ctx); + } else { + static_assert(false, "Bad format"); + } +} + +void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +void EmitSPIRV::EmitPhi(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitVoid(EmitContext&) {} + +void EmitSPIRV::EmitIdentity(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 99cc8e08a..3f4b68a7d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,18 +4,326 @@ #pragma once +#include + +#include + +#include "common/common_types.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { +using Sirit::Id; + +class DefMap { +public: + void Define(IR::Inst* inst, Id def_id) { + const InstInfo info{.use_count{inst->UseCount()}, .def_id{def_id}}; + const auto it{map.insert(map.end(), std::make_pair(inst, info))}; + if (it == map.end()) { + throw LogicError("Defining already defined instruction"); + } + } + + [[nodiscard]] Id Consume(IR::Inst* inst) { + const auto it{map.find(inst)}; + if (it == map.end()) { + throw LogicError("Consuming undefined instruction"); + } + const Id def_id{it->second.def_id}; + if (--it->second.use_count == 0) { + map.erase(it); + } + return def_id; + } + +private: + struct InstInfo { + int use_count; + Id def_id; + }; + + boost::container::flat_map map; +}; + +class VectorTypes { +public: + void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { + defs[0] = sirit_ctx.Name(base_type, name); + + std::array def_name; + for (int i = 1; i < 4; ++i) { + const std::string_view def_name_view( + def_name.data(), + fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); + defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + } + } + + [[nodiscard]] Id operator[](size_t size) const noexcept { + return defs[size - 1]; + } + +private: + std::array defs; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(IR::Program& program); + ~EmitContext(); + + [[nodiscard]] Id Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return def_map.Consume(value.Inst()); + } + switch (value.Type()) { + case IR::Type::U32: + return Constant(u32[1], value.U32()); + case IR::Type::F32: + return Constant(f32[1], value.F32()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } + } + + void Define(IR::Inst* inst, Id def_id) { + def_map.Define(inst, def_id); + } + + [[nodiscard]] Id BlockLabel(IR::Block* block) const { + const auto it{std::ranges::lower_bound(block_label_map, block, {}, + &std::pair::first)}; + if (it == block_label_map.end()) { + throw LogicError("Undefined block"); + } + return it->second; + } + + Id void_id{}; + Id u1{}; + VectorTypes f32; + VectorTypes u32; + VectorTypes f16; + VectorTypes f64; + + Id workgroup_id{}; + Id local_invocation_id{}; + +private: + DefMap def_map; + std::vector> block_label_map; +}; + class EmitSPIRV { public: + explicit EmitSPIRV(IR::Program& program); + private: + void EmitInst(EmitContext& ctx, IR::Inst* inst); + // Microinstruction emitters -#define OPCODE(name, result_type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE + void EmitPhi(EmitContext& ctx); + void EmitVoid(EmitContext& ctx); + void EmitIdentity(EmitContext& ctx); + void EmitBranch(EmitContext& ctx, IR::Inst* inst); + void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); + void EmitExit(EmitContext& ctx); + void EmitReturn(EmitContext& ctx); + void EmitUnreachable(EmitContext& ctx); + void EmitGetRegister(EmitContext& ctx); + void EmitSetRegister(EmitContext& ctx); + void EmitGetPred(EmitContext& ctx); + void EmitSetPred(EmitContext& ctx); + Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); + void EmitGetAttribute(EmitContext& ctx); + void EmitSetAttribute(EmitContext& ctx); + void EmitGetAttributeIndexed(EmitContext& ctx); + void EmitSetAttributeIndexed(EmitContext& ctx); + void EmitGetZFlag(EmitContext& ctx); + void EmitGetSFlag(EmitContext& ctx); + void EmitGetCFlag(EmitContext& ctx); + void EmitGetOFlag(EmitContext& ctx); + void EmitSetZFlag(EmitContext& ctx); + void EmitSetSFlag(EmitContext& ctx); + void EmitSetCFlag(EmitContext& ctx); + void EmitSetOFlag(EmitContext& ctx); + Id EmitWorkgroupId(EmitContext& ctx); + Id EmitLocalInvocationId(EmitContext& ctx); + void EmitUndef1(EmitContext& ctx); + void EmitUndef8(EmitContext& ctx); + void EmitUndef16(EmitContext& ctx); + void EmitUndef32(EmitContext& ctx); + void EmitUndef64(EmitContext& ctx); + void EmitLoadGlobalU8(EmitContext& ctx); + void EmitLoadGlobalS8(EmitContext& ctx); + void EmitLoadGlobalU16(EmitContext& ctx); + void EmitLoadGlobalS16(EmitContext& ctx); + void EmitLoadGlobal32(EmitContext& ctx); + void EmitLoadGlobal64(EmitContext& ctx); + void EmitLoadGlobal128(EmitContext& ctx); + void EmitWriteGlobalU8(EmitContext& ctx); + void EmitWriteGlobalS8(EmitContext& ctx); + void EmitWriteGlobalU16(EmitContext& ctx); + void EmitWriteGlobalS16(EmitContext& ctx); + void EmitWriteGlobal32(EmitContext& ctx); + void EmitWriteGlobal64(EmitContext& ctx); + void EmitWriteGlobal128(EmitContext& ctx); + void EmitLoadStorageU8(EmitContext& ctx); + void EmitLoadStorageS8(EmitContext& ctx); + void EmitLoadStorageU16(EmitContext& ctx); + void EmitLoadStorageS16(EmitContext& ctx); + Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); + void EmitLoadStorage64(EmitContext& ctx); + void EmitLoadStorage128(EmitContext& ctx); + void EmitWriteStorageU8(EmitContext& ctx); + void EmitWriteStorageS8(EmitContext& ctx); + void EmitWriteStorageU16(EmitContext& ctx); + void EmitWriteStorageS16(EmitContext& ctx); + void EmitWriteStorage32(EmitContext& ctx); + void EmitWriteStorage64(EmitContext& ctx); + void EmitWriteStorage128(EmitContext& ctx); + void EmitCompositeConstructU32x2(EmitContext& ctx); + void EmitCompositeConstructU32x3(EmitContext& ctx); + void EmitCompositeConstructU32x4(EmitContext& ctx); + void EmitCompositeExtractU32x2(EmitContext& ctx); + Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); + void EmitCompositeExtractU32x4(EmitContext& ctx); + void EmitCompositeConstructF16x2(EmitContext& ctx); + void EmitCompositeConstructF16x3(EmitContext& ctx); + void EmitCompositeConstructF16x4(EmitContext& ctx); + void EmitCompositeExtractF16x2(EmitContext& ctx); + void EmitCompositeExtractF16x3(EmitContext& ctx); + void EmitCompositeExtractF16x4(EmitContext& ctx); + void EmitCompositeConstructF32x2(EmitContext& ctx); + void EmitCompositeConstructF32x3(EmitContext& ctx); + void EmitCompositeConstructF32x4(EmitContext& ctx); + void EmitCompositeExtractF32x2(EmitContext& ctx); + void EmitCompositeExtractF32x3(EmitContext& ctx); + void EmitCompositeExtractF32x4(EmitContext& ctx); + void EmitCompositeConstructF64x2(EmitContext& ctx); + void EmitCompositeConstructF64x3(EmitContext& ctx); + void EmitCompositeConstructF64x4(EmitContext& ctx); + void EmitCompositeExtractF64x2(EmitContext& ctx); + void EmitCompositeExtractF64x3(EmitContext& ctx); + void EmitCompositeExtractF64x4(EmitContext& ctx); + void EmitSelect8(EmitContext& ctx); + void EmitSelect16(EmitContext& ctx); + void EmitSelect32(EmitContext& ctx); + void EmitSelect64(EmitContext& ctx); + void EmitBitCastU16F16(EmitContext& ctx); + Id EmitBitCastU32F32(EmitContext& ctx, Id value); + void EmitBitCastU64F64(EmitContext& ctx); + void EmitBitCastF16U16(EmitContext& ctx); + Id EmitBitCastF32U32(EmitContext& ctx, Id value); + void EmitBitCastF64U64(EmitContext& ctx); + void EmitPackUint2x32(EmitContext& ctx); + void EmitUnpackUint2x32(EmitContext& ctx); + void EmitPackFloat2x16(EmitContext& ctx); + void EmitUnpackFloat2x16(EmitContext& ctx); + void EmitPackDouble2x32(EmitContext& ctx); + void EmitUnpackDouble2x32(EmitContext& ctx); + void EmitGetZeroFromOp(EmitContext& ctx); + void EmitGetSignFromOp(EmitContext& ctx); + void EmitGetCarryFromOp(EmitContext& ctx); + void EmitGetOverflowFromOp(EmitContext& ctx); + void EmitFPAbs16(EmitContext& ctx); + void EmitFPAbs32(EmitContext& ctx); + void EmitFPAbs64(EmitContext& ctx); + Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); + Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); + Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); + void EmitFPMax32(EmitContext& ctx); + void EmitFPMax64(EmitContext& ctx); + void EmitFPMin32(EmitContext& ctx); + void EmitFPMin64(EmitContext& ctx); + Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + void EmitFPNeg16(EmitContext& ctx); + void EmitFPNeg32(EmitContext& ctx); + void EmitFPNeg64(EmitContext& ctx); + void EmitFPRecip32(EmitContext& ctx); + void EmitFPRecip64(EmitContext& ctx); + void EmitFPRecipSqrt32(EmitContext& ctx); + void EmitFPRecipSqrt64(EmitContext& ctx); + void EmitFPSqrt(EmitContext& ctx); + void EmitFPSin(EmitContext& ctx); + void EmitFPSinNotReduced(EmitContext& ctx); + void EmitFPExp2(EmitContext& ctx); + void EmitFPExp2NotReduced(EmitContext& ctx); + void EmitFPCos(EmitContext& ctx); + void EmitFPCosNotReduced(EmitContext& ctx); + void EmitFPLog2(EmitContext& ctx); + void EmitFPSaturate16(EmitContext& ctx); + void EmitFPSaturate32(EmitContext& ctx); + void EmitFPSaturate64(EmitContext& ctx); + void EmitFPRoundEven16(EmitContext& ctx); + void EmitFPRoundEven32(EmitContext& ctx); + void EmitFPRoundEven64(EmitContext& ctx); + void EmitFPFloor16(EmitContext& ctx); + void EmitFPFloor32(EmitContext& ctx); + void EmitFPFloor64(EmitContext& ctx); + void EmitFPCeil16(EmitContext& ctx); + void EmitFPCeil32(EmitContext& ctx); + void EmitFPCeil64(EmitContext& ctx); + void EmitFPTrunc16(EmitContext& ctx); + void EmitFPTrunc32(EmitContext& ctx); + void EmitFPTrunc64(EmitContext& ctx); + Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + void EmitIAdd64(EmitContext& ctx); + Id EmitISub32(EmitContext& ctx, Id a, Id b); + void EmitISub64(EmitContext& ctx); + Id EmitIMul32(EmitContext& ctx, Id a, Id b); + void EmitINeg32(EmitContext& ctx); + void EmitIAbs32(EmitContext& ctx); + Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); + void EmitShiftRightLogical32(EmitContext& ctx); + void EmitShiftRightArithmetic32(EmitContext& ctx); + void EmitBitwiseAnd32(EmitContext& ctx); + void EmitBitwiseOr32(EmitContext& ctx); + void EmitBitwiseXor32(EmitContext& ctx); + void EmitBitFieldInsert(EmitContext& ctx); + void EmitBitFieldSExtract(EmitContext& ctx); + Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); + void EmitSLessThan(EmitContext& ctx); + void EmitULessThan(EmitContext& ctx); + void EmitIEqual(EmitContext& ctx); + void EmitSLessThanEqual(EmitContext& ctx); + void EmitULessThanEqual(EmitContext& ctx); + void EmitSGreaterThan(EmitContext& ctx); + void EmitUGreaterThan(EmitContext& ctx); + void EmitINotEqual(EmitContext& ctx); + void EmitSGreaterThanEqual(EmitContext& ctx); + Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); + void EmitLogicalOr(EmitContext& ctx); + void EmitLogicalAnd(EmitContext& ctx); + void EmitLogicalXor(EmitContext& ctx); + void EmitLogicalNot(EmitContext& ctx); + void EmitConvertS16F16(EmitContext& ctx); + void EmitConvertS16F32(EmitContext& ctx); + void EmitConvertS16F64(EmitContext& ctx); + void EmitConvertS32F16(EmitContext& ctx); + void EmitConvertS32F32(EmitContext& ctx); + void EmitConvertS32F64(EmitContext& ctx); + void EmitConvertS64F16(EmitContext& ctx); + void EmitConvertS64F32(EmitContext& ctx); + void EmitConvertS64F64(EmitContext& ctx); + void EmitConvertU16F16(EmitContext& ctx); + void EmitConvertU16F32(EmitContext& ctx); + void EmitConvertU16F64(EmitContext& ctx); + void EmitConvertU32F16(EmitContext& ctx); + void EmitConvertU32F32(EmitContext& ctx); + void EmitConvertU32F64(EmitContext& ctx); + void EmitConvertU64F16(EmitContext& ctx); + void EmitConvertU64F32(EmitContext& ctx); + void EmitConvertU64F64(EmitContext& ctx); + void EmitConvertU64U32(EmitContext& ctx); + void EmitConvertU32U64(EmitContext& ctx); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 000000000..447df5b8c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.u32[1], value); +} + +void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.f32[1], value); +} + +void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitPackUint2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 000000000..b190cf876 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { + return ctx.OpCompositeExtract(ctx.u32[1], vector, index); +} + +void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 000000000..b121305ea --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -0,0 +1,102 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitGetRegister(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetRegister(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetPred(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetPred(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Constant buffer indexing"); + } + if (!offset.IsImmediate()) { + throw NotImplementedException("Variable constant buffer offset"); + } + return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_cbuf"); +} + +void EmitSPIRV::EmitGetAttribute(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetAttribute(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { + if (ctx.workgroup_id.value == 0) { + ctx.workgroup_id = ctx.AddGlobalVariable( + ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); + ctx.Decorate(ctx.workgroup_id, spv::Decoration::BuiltIn, spv::BuiltIn::WorkgroupId); + } + return ctx.OpLoad(ctx.u32[3], ctx.workgroup_id); +} + +Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { + if (ctx.local_invocation_id.value == 0) { + ctx.local_invocation_id = ctx.AddGlobalVariable( + ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); + ctx.Decorate(ctx.local_invocation_id, spv::Decoration::BuiltIn, + spv::BuiltIn::LocalInvocationId); + } + return ctx.OpLoad(ctx.u32[3], ctx.local_invocation_id); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp new file mode 100644 index 000000000..770fe113c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), + ctx.BlockLabel(inst->Arg(2).Label())); +} + +void EmitSPIRV::EmitExit(EmitContext& ctx) { + ctx.OpReturn(); +} + +void EmitSPIRV::EmitReturn(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnreachable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..9c39537e2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -0,0 +1,220 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { + const auto flags{inst->Flags()}; + if (flags.no_contraction) { + ctx.Decorate(op, spv::Decoration::NoContraction); + } + switch (flags.rounding) { + case IR::FpRounding::RN: + break; + case IR::FpRounding::RM: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); + break; + case IR::FpRounding::RP: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); + break; + case IR::FpRounding::RZ: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); + break; + } + if (flags.fmz_mode != IR::FmzMode::FTZ) { + throw NotImplementedException("Denorm management not implemented"); + } + return op; +} + +} // Anonymous namespace + +void EmitSPIRV::EmitFPAbs16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPAbs32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPAbs64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f16[1], a, b)); +} + +Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f32[1], a, b)); +} + +Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f64[1], a, b)); +} + +Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f16[1], a, b, c)); +} + +Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f32[1], a, b, c)); +} + +Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f64[1], a, b, c)); +} + +void EmitSPIRV::EmitFPMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f16[1], a, b)); +} + +Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f32[1], a, b)); +} + +Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f64[1], a, b)); +} + +void EmitSPIRV::EmitFPNeg16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPNeg32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPNeg64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecip32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecip64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSqrt(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSin(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPExp2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCos(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPLog2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 000000000..3ef4f3d78 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -0,0 +1,132 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + if (inst->HasAssociatedPseudoOperation()) { + throw NotImplementedException("Pseudo-operations on IAdd32"); + } + return ctx.OpIAdd(ctx.u32[1], a, b); +} + +void EmitSPIRV::EmitIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.u32[1], a, b); +} + +void EmitSPIRV::EmitISub64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.u32[1], a, b); +} + +void EmitSPIRV::EmitINeg32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitIAbs32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.u32[1], base, shift); +} + +void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { + return ctx.OpBitFieldUExtract(ctx.u32[1], base, offset, count); +} + +void EmitSPIRV::EmitSLessThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitULessThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitIEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitULessThanEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSGreaterThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUGreaterThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitINotEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.u1, lhs, rhs); +} + +void EmitSPIRV::EmitLogicalOr(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLogicalAnd(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLogicalXor(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLogicalNot(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 000000000..7b43c4ed8 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitConvertS16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS16F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS16F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS32F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS32F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS32F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS64F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS64F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU16F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU16F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64U32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp new file mode 100644 index 000000000..21a0d72fa --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -0,0 +1,125 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Storage buffer indexing"); + } + return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf"); +} + +void EmitSPIRV::EmitLoadStorage64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorage128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx) { + ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf_store"); +} + +void EmitSPIRV::EmitWriteStorage64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorage128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 000000000..40a856f72 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitSelect8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSelect16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSelect32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSelect64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 000000000..3850b072c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -0,0 +1,29 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitUndef1(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9d7dc034c..ada0be834 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -130,27 +130,27 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { } U32 IREmitter::WorkgroupIdX() { - return Inst(Opcode::WorkgroupIdX); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } U32 IREmitter::WorkgroupIdY() { - return Inst(Opcode::WorkgroupIdY); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)}; } U32 IREmitter::WorkgroupIdZ() { - return Inst(Opcode::WorkgroupIdZ); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } U32 IREmitter::LocalInvocationIdX() { - return Inst(Opcode::LocalInvocationIdX); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } U32 IREmitter::LocalInvocationIdY() { - return Inst(Opcode::LocalInvocationIdY); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)}; } U32 IREmitter::LocalInvocationIdZ() { - return Inst(Opcode::LocalInvocationIdZ); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } U32 IREmitter::LoadGlobalU8(const U64& address) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 82b04f37c..5dc65f2df 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -21,9 +21,9 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) @@ -32,12 +32,8 @@ OPCODE(SetZFlag, Void, U1, OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupIdX, U32, ) -OPCODE(WorkgroupIdY, U32, ) -OPCODE(WorkgroupIdZ, U32, ) -OPCODE(LocalInvocationIdX, U32, ) -OPCODE(LocalInvocationIdY, U32, ) -OPCODE(LocalInvocationIdZ, U32, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined OPCODE(Undef1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index dcc3f6c0e..7e6bb07a2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -11,15 +11,15 @@ namespace Shader::Maxwell { -template +template static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { - using MethodType = decltype(visitor_method); + using MethodType = decltype(method); if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(pc, insn); + (visitor.*method)(pc, insn); } else if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(insn); + (visitor.*method)(insn); } else { - (visitor.*visitor_method)(); + (visitor.*method)(); } } diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 39a972919..593efde39 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -13,7 +13,7 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Function& function) { std::vector to_invalidate; - for (auto& block : function.blocks) { + for (IR::Block* const block : function.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 19e36590c..9887e066d 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -6,6 +6,7 @@ #include +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/file_environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -51,18 +52,18 @@ void RunDatabase() { int main() { // RunDatabase(); - // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; - auto cfg{std::make_unique(env, 0)}; - // fmt::print(stdout, "{}\n", cfg->Dot()); - auto inst_pool{std::make_unique>()}; auto block_pool{std::make_unique>()}; - for (int i = 0; i < 8192 * 4; ++i) { - void(inst_pool->Create(IR::Opcode::Void, 0)); + // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; + for (int i = 0; i < 1; ++i) { + block_pool->ReleaseContents(); + inst_pool->ReleaseContents(); + auto cfg{std::make_unique(env, 0)}; + // fmt::print(stdout, "{}\n", cfg->Dot()); + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; + // fmt::print(stdout, "{}\n", IR::DumpProgram(program)); + Backend::SPIRV::EmitSPIRV spirv{program}; } - - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; - fmt::print(stdout, "{}\n", IR::DumpProgram(program)); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null @@ -1,3166 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Vulkan { - -namespace { - -using Sirit::Id; -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -// TODO(Rodrigo): Use rasterizer's value -constexpr u32 MaxConstBufferFloats = 0x4000; -constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; - -constexpr u32 NumInputPatches = 32; // This value seems to be the standard - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -class Expression final { -public: - Expression(Id id_, Type type_) : id{id_}, type{type_} { - ASSERT(type_ != Type::Void); - } - Expression() : type{Type::Void} {} - - Id id{}; - Type type{}; -}; -static_assert(std::is_standard_layout_v); - -struct TexelBuffer { - Id image_type{}; - Id image{}; -}; - -struct SampledImage { - Id image_type{}; - Id sampler_type{}; - Id sampler_pointer_type{}; - Id variable{}; -}; - -struct StorageImage { - Id image_type{}; - Id image{}; -}; - -struct AttributeType { - Type type; - Id scalar; - Id vector; -}; - -struct VertexIndices { - std::optional position; - std::optional layer; - std::optional viewport; - std::optional point_size; - std::optional clip_distances; -}; - -struct GenericVaryingDescription { - Id id = nullptr; - u32 first_element = 0; - bool is_scalar = false; -}; - -spv::Dim GetSamplerDim(const SamplerEntry& sampler) { - ASSERT(!sampler.is_buffer); - switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return spv::Dim::Dim1D; - case Tegra::Shader::TextureType::Texture2D: - return spv::Dim::Dim2D; - case Tegra::Shader::TextureType::Texture3D: - return spv::Dim::Dim3D; - case Tegra::Shader::TextureType::TextureCube: - return spv::Dim::Cube; - default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); - return spv::Dim::Dim2D; - } -} - -std::pair GetImageDim(const ImageEntry& image) { - switch (image.type) { - case Tegra::Shader::ImageType::Texture1D: - return {spv::Dim::Dim1D, false}; - case Tegra::Shader::ImageType::TextureBuffer: - return {spv::Dim::Buffer, false}; - case Tegra::Shader::ImageType::Texture1DArray: - return {spv::Dim::Dim1D, true}; - case Tegra::Shader::ImageType::Texture2D: - return {spv::Dim::Dim2D, false}; - case Tegra::Shader::ImageType::Texture2DArray: - return {spv::Dim::Dim2D, true}; - case Tegra::Shader::ImageType::Texture3D: - return {spv::Dim::Dim3D, false}; - default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); - return {spv::Dim::Dim2D, false}; - } -} - -/// Returns the number of vertices present in a primitive topology. -u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { - switch (primitive_topology) { - case Maxwell::PrimitiveTopology::Points: - return 1; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return 2; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return 3; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return 4; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return 6; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return 3; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return 3; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return 3; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return 3; - default: - UNREACHABLE(); - return 3; - } -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { - switch (primitive) { - case Maxwell::TessellationPrimitive::Isolines: - return spv::ExecutionMode::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return spv::ExecutionMode::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return spv::ExecutionMode::Quads; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { - switch (spacing) { - case Maxwell::TessellationSpacing::Equal: - return spv::ExecutionMode::SpacingEqual; - case Maxwell::TessellationSpacing::FractionalOdd: - return spv::ExecutionMode::SpacingFractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return spv::ExecutionMode::SpacingFractionalEven; - } - UNREACHABLE(); - return spv::ExecutionMode::SpacingEqual; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { - switch (input_topology) { - case Maxwell::PrimitiveTopology::Points: - return spv::ExecutionMode::InputPoints; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return spv::ExecutionMode::InputLines; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return spv::ExecutionMode::InputLinesAdjacency; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return spv::ExecutionMode::InputTrianglesAdjacency; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return spv::ExecutionMode::Triangles; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { - switch (output_topology) { - case Tegra::Shader::OutputTopology::PointList: - return spv::ExecutionMode::OutputPoints; - case Tegra::Shader::OutputTopology::LineStrip: - return spv::ExecutionMode::OutputLineStrip; - case Tegra::Shader::OutputTopology::TriangleStrip: - return spv::ExecutionMode::OutputTriangleStrip; - default: - UNREACHABLE(); - return spv::ExecutionMode::OutputPoints; - } -} - -/// Returns true if an attribute index is one of the 32 generic attributes -constexpr bool IsGenericAttribute(Attribute::Index attribute) { - return attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31; -} - -/// Returns the location of a generic attribute -u32 GetGenericAttributeLocation(Attribute::Index attribute) { - ASSERT(IsGenericAttribute(attribute)); - return static_cast(attribute) - static_cast(Attribute::Index::Attribute_0); -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (std::holds_alternative(meta)) { - return std::get(meta).precise; - } - return false; -} - -class SPIRVDecompiler final : public Sirit::Module { -public: - explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, - const Registry& registry_, const Specialization& specialization_) - : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, - registry{registry_}, specialization{specialization_} { - if (stage_ != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); - } - - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); - AddCapability(spv::Capability::ImageQuery); - AddCapability(spv::Capability::Image1D); - AddCapability(spv::Capability::ImageBuffer); - AddCapability(spv::Capability::ImageGatherExtended); - AddCapability(spv::Capability::SampledBuffer); - AddCapability(spv::Capability::StorageImageWriteWithoutFormat); - AddCapability(spv::Capability::DrawParameters); - AddCapability(spv::Capability::SubgroupBallotKHR); - AddCapability(spv::Capability::SubgroupVoteKHR); - AddExtension("SPV_KHR_16bit_storage"); - AddExtension("SPV_KHR_shader_ballot"); - AddExtension("SPV_KHR_subgroup_vote"); - AddExtension("SPV_KHR_storage_buffer_storage_class"); - AddExtension("SPV_KHR_variable_pointers"); - AddExtension("SPV_KHR_shader_draw_parameters"); - - if (!transform_feedback.empty()) { - if (device.IsExtTransformFeedbackSupported()) { - AddCapability(spv::Capability::TransformFeedback); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " - "supported on this device"); - } - } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { - if (ir.UsesViewportIndex()) { - AddCapability(spv::Capability::MultiViewport); - } - if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { - AddExtension("SPV_EXT_shader_viewport_index_layer"); - AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); - } - } - if (device.IsFormatlessImageLoadSupported()) { - AddCapability(spv::Capability::StorageImageReadWithoutFormat); - } - if (device.IsFloat16Supported()) { - AddCapability(spv::Capability::Float16); - } - t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); - t_half = Name(TypeVector(t_scalar_half, 2), "half"); - - const Id main = Decompile(); - - switch (stage) { - case ShaderType::Vertex: - AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); - break; - case ShaderType::TesselationControl: - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common2.threads_per_input_primitive); - break; - case ShaderType::TesselationEval: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); - AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); - AddExecutionMode(main, info.tessellation_clockwise - ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); - break; - } - case ShaderType::Geometry: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Geometry); - AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); - AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common4.max_output_vertices); - // TODO(Rodrigo): Where can we get this info from? - AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); - break; - } - case ShaderType::Fragment: - AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); - if (header.ps.omap.depth) { - AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - } - if (specialization.early_fragment_tests) { - AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); - } - break; - case ShaderType::Compute: - const auto workgroup_size = specialization.workgroup_size; - AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); - break; - } - } - -private: - Id Decompile() { - DeclareCommon(); - DeclareVertex(); - DeclareTessControl(); - DeclareTessEval(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareSharedMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - - u32 binding = specialization.base_binding; - binding = DeclareConstantBuffers(binding); - binding = DeclareGlobalBuffers(binding); - binding = DeclareUniformTexels(binding); - binding = DeclareSamplers(binding); - binding = DeclareStorageTexels(binding); - binding = DeclareImages(binding); - - const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); - AddLabel(); - - if (ir.IsDecompiled()) { - DeclareFlowVariables(); - DecompileAST(); - } else { - AllocateLabels(); - DecompileBranchMode(); - } - - OpReturn(); - OpFunctionEnd(); - - return main; - } - - void DefinePrologue() { - if (stage == ShaderType::Vertex) { - // Clear Position to avoid reading trash on the Z conversion. - const auto position_index = out_indices.position.value(); - const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, v_varying_default); - - if (specialization.point_size) { - const u32 point_size_index = out_indices.point_size.value(); - const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); - OpStore(out_point_size, Constant(t_float, *specialization.point_size)); - } - } - } - - void DecompileAST(); - - void DecompileBranchMode() { - const u32 first_address = ir.GetBasicBlocks().begin()->first; - const Id loop_label = OpLabel("loop"); - const Id merge_label = OpLabel("merge"); - const Id dummy_label = OpLabel(); - const Id jump_label = OpLabel(); - continue_label = OpLabel("continue"); - - std::vector literals; - std::vector branch_labels; - for (const auto& [literal, label] : labels) { - literals.push_back(literal); - branch_labels.push_back(label); - } - - jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), - spv::StorageClass::Function, Constant(t_uint, first_address)); - AddLocalVariable(jmp_to); - - std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); - std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); - - Name(jmp_to, "jmp_to"); - Name(ssy_flow_stack, "ssy_flow_stack"); - Name(ssy_flow_stack_top, "ssy_flow_stack_top"); - Name(pbk_flow_stack, "pbk_flow_stack"); - Name(pbk_flow_stack_top, "pbk_flow_stack_top"); - - DefinePrologue(); - - OpBranch(loop_label); - AddLabel(loop_label); - OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); - OpBranch(dummy_label); - - AddLabel(dummy_label); - const Id default_branch = OpLabel(); - const Id jmp_to_load = OpLoad(t_uint, jmp_to); - OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); - OpSwitch(jmp_to_load, default_branch, literals, branch_labels); - - AddLabel(default_branch); - OpReturn(); - - for (const auto& [address, bb] : ir.GetBasicBlocks()) { - AddLabel(labels.at(address)); - - VisitBasicBlock(bb); - - const auto next_it = labels.lower_bound(address + 1); - const Id next_label = next_it != labels.end() ? next_it->second : default_branch; - OpBranch(next_label); - } - - AddLabel(jump_label); - OpBranch(continue_label); - AddLabel(continue_label); - OpBranch(loop_label); - AddLabel(merge_label); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); - - void AllocateLabels() { - for (const auto& pair : ir.GetBasicBlocks()) { - const u32 address = pair.first; - labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); - } - } - - void DeclareCommon() { - thread_id = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); - thread_masks[0] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); - thread_masks[1] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); - thread_masks[2] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); - thread_masks[3] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); - thread_masks[4] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - - // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); - instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); - base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); - base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); - } - - void DeclareTessControl() { - if (stage != ShaderType::TesselationControl) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertexArray(header.common2.threads_per_input_primitive); - - tess_level_outer = DeclareBuiltIn( - spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), - "tess_level_outer"); - Decorate(tess_level_outer, spv::Decoration::Patch); - - tess_level_inner = DeclareBuiltIn( - spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), - "tess_level_inner"); - Decorate(tess_level_inner, spv::Decoration::Patch); - - invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); - } - - void DeclareTessEval() { - if (stage != ShaderType::TesselationEval) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertex(); - - tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); - DeclareInputVertexArray(num_input); - DeclareOutputVertex(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - - for (u32 rt = 0; rt < static_cast(std::size(frag_colors)); ++rt) { - if (!IsRenderTargetEnabled(rt)) { - continue; - } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); - Name(id, fmt::format("frag_color{}", rt)); - Decorate(id, spv::Decoration::Location, rt); - - frag_colors[rt] = id; - interfaces.push_back(id); - } - - if (header.ps.omap.depth) { - frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); - Name(frag_depth, "frag_depth"); - Decorate(frag_depth, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::FragDepth)); - - interfaces.push_back(frag_depth); - } - - frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); - front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); - point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - - workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); - local_invocation_id = - DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); - } - - void DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("gpr_{}", gpr)); - registers.emplace(gpr, AddGlobalVariable(id)); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("custom_var_{}", i)); - custom_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclarePredicates() { - for (const auto pred : ir.GetPredicates()) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("pred_{}", static_cast(pred))); - predicates.emplace(pred, AddGlobalVariable(id)); - } - } - - void DeclareFlowVariables() { - for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", static_cast(i))); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); - if (lmem_size == 0) { - return; - } - const auto element_count = static_cast(Common::AlignUp(lmem_size, 4) / 4); - const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); - const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); - Name(type_pointer, "LocalMemory"); - - local_memory = - OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); - AddGlobalVariable(Name(local_memory, "local_memory")); - } - - void DeclareSharedMemory() { - if (stage != ShaderType::Compute) { - return; - } - t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - - u32 smem_size = specialization.shared_memory_size * 4; - if (smem_size == 0) { - // Avoid declaring an empty array. - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (smem_size > limit) { - LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", - smem_size, limit); - smem_size = limit; - } - - const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); - const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); - Name(type_pointer, "SharedMemory"); - - shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); - AddGlobalVariable(Name(shared_memory, "shared_memory")); - } - - void DeclareInternalFlags() { - static constexpr std::array names{"zero", "sign", "carry", "overflow"}; - - for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); - } - } - - void DeclareInputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Input; - std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); - } - - void DeclareOutputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Output; - std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); - } - - std::tuple DeclareVertexArray(spv::StorageClass storage_class, - std::string name, u32 length) { - const auto [struct_id, indices] = DeclareVertexStruct(); - const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); - const Id vertex_ptr = TypePointer(storage_class, vertex_array); - const Id vertex = OpVariable(vertex_ptr, storage_class); - AddGlobalVariable(Name(vertex, std::move(name))); - interfaces.push_back(vertex); - return {indices, vertex}; - } - - void DeclareOutputVertex() { - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - } - - void DeclareInputAttributes() { - for (const auto index : ir.GetInputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - const u32 location = GetGenericAttributeLocation(index); - if (!IsAttributeEnabled(location)) { - continue; - } - const auto type_descriptor = GetAttributeType(location); - Id type; - if (IsInputAttributeArray()) { - type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); - type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); - type = TypePointer(spv::StorageClass::Input, type); - } else { - type = type_descriptor.vector; - } - const Id id = OpVariable(type, spv::StorageClass::Input); - AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); - input_attributes.emplace(index, id); - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - - if (stage != ShaderType::Fragment) { - continue; - } - switch (header.ps.GetPixelImap(location)) { - case PixelImap::Constant: - Decorate(id, spv::Decoration::Flat); - break; - case PixelImap::Perspective: - // Default - break; - case PixelImap::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - default: - UNREACHABLE_MSG("Unused attribute being fetched"); - } - } - } - - void DeclareOutputAttributes() { - if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { - return; - } - - UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); - for (const auto index : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - DeclareOutputAttribute(index); - } - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - - const u32 location = GetGenericAttributeLocation(index); - u8 element = 0; - while (element < 4) { - const std::size_t remainder = 4 - element; - - std::size_t num_components = remainder; - const std::optional tfb = GetTransformFeedbackInfo(index, element); - if (tfb) { - num_components = tfb->components; - } - - Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); - Id varying_default = v_varying_default; - if (IsOutputAttributeArray()) { - const u32 num = GetNumOutputVertices(); - type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { - // Intel's proprietary driver fails to setup defaults for arrayed output - // attributes. - varying_default = ConstantComposite(type, std::vector(num, varying_default)); - } - } - type = TypePointer(spv::StorageClass::Output, type); - - std::string name = fmt::format("out_attr{}", location); - if (num_components < 4 || element > 0) { - name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); - } - - const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), name); - - GenericVaryingDescription description; - description.id = id; - description.first_element = element; - description.is_scalar = num_components == 1; - for (u32 i = 0; i < num_components; ++i) { - const u8 offset = static_cast(static_cast(index) * 4 + element + i); - output_attributes.emplace(offset, description); - } - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - if (element > 0) { - Decorate(id, spv::Decoration::Component, static_cast(element)); - } - if (tfb && device.IsExtTransformFeedbackSupported()) { - Decorate(id, spv::Decoration::XfbBuffer, static_cast(tfb->buffer)); - Decorate(id, spv::Decoration::XfbStride, static_cast(tfb->stride)); - Decorate(id, spv::Decoration::Offset, static_cast(tfb->offset)); - } - - element = static_cast(static_cast(element) + num_components); - } - } - - std::optional GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - return it->second; - } - - u32 DeclareConstantBuffers(u32 binding) { - for (const auto& [index, size] : ir.GetConstantBuffers()) { - const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo - : t_cbuf_std140_ubo; - const Id id = OpVariable(type, spv::StorageClass::Uniform); - AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - constant_buffers.emplace(index, id); - } - return binding; - } - - u32 DeclareGlobalBuffers(u32 binding) { - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); - AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(base, id); - } - return binding; - } - - u32 DeclareUniformTexels(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (!sampler.is_buffer) { - continue; - } - ASSERT(!sampler.is_array); - ASSERT(!sampler.is_shadow); - - constexpr auto dim = spv::Dim::Buffer; - constexpr int depth = 0; - constexpr int arrayed = 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); - } - return binding; - } - - u32 DeclareSamplers(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - continue; - } - const auto dim = GetSamplerDim(sampler); - const int depth = sampler.is_shadow ? 1 : 0; - const int arrayed = sampler.is_array ? 1 : 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampler_type = TypeSampledImage(image_type); - const Id sampler_pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampler_type); - const Id type = sampler.is_indexed - ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) - : sampler_type; - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - sampled_images.emplace( - sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); - } - return binding; - } - - u32 DeclareStorageTexels(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type != Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - u32 DeclareImages(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - void DeclareImage(const ImageEntry& image, u32& binding) { - const auto [dim, arrayed] = GetImageDim(image); - constexpr int depth = 0; - constexpr bool ms = false; - constexpr int sampled = 2; // This won't be accessed with a sampler - const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - if (image.is_read && !image.is_written) { - Decorate(id, spv::Decoration::NonWritable); - } else if (image.is_written && !image.is_read) { - Decorate(id, spv::Decoration::NonReadable); - } - - images.emplace(image.index, StorageImage{image_type, id}); - } - - bool IsRenderTargetEnabled(u32 rt) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - return true; - } - } - return false; - } - - bool IsInputAttributeArray() const { - return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || - stage == ShaderType::Geometry; - } - - bool IsOutputAttributeArray() const { - return stage == ShaderType::TesselationControl; - } - - bool IsAttributeEnabled(u32 location) const { - return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; - } - - u32 GetNumInputVertices() const { - switch (stage) { - case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); - case ShaderType::TesselationControl: - case ShaderType::TesselationEval: - return NumInputPatches; - default: - UNREACHABLE(); - return 1; - } - } - - u32 GetNumOutputVertices() const { - switch (stage) { - case ShaderType::TesselationControl: - return header.common2.threads_per_input_primitive; - default: - UNREACHABLE(); - return 1; - } - } - - std::tuple DeclareVertexStruct() { - struct BuiltIn { - Id type; - spv::BuiltIn builtin; - const char* name; - }; - std::vector members; - members.reserve(4); - - const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { - const auto index = static_cast(members.size()); - members.push_back(BuiltIn{type, builtin, name}); - return index; - }; - - VertexIndices indices; - indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); - - if (ir.UsesLayer()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); - } else { - LOG_ERROR( - Render_Vulkan, - "Shader requires Layer but it's not supported on this stage with this device."); - } - } - - if (ir.UsesViewportIndex()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " - "this stage with this device."); - } - } - - if (ir.UsesPointSize() || specialization.point_size) { - indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); - } - - const auto& ir_output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = std::any_of( - ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { - return index == Attribute::Index::ClipDistances0123 || - index == Attribute::Index::ClipDistances4567; - }); - if (declare_clip_distances) { - indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), - spv::BuiltIn::ClipDistance, "clip_distances"); - } - - std::vector member_types; - member_types.reserve(members.size()); - for (std::size_t i = 0; i < members.size(); ++i) { - member_types.push_back(members[i].type); - } - const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); - Decorate(per_vertex_struct, spv::Decoration::Block); - - for (std::size_t index = 0; index < members.size(); ++index) { - const auto& member = members[index]; - MemberName(per_vertex_struct, static_cast(index), member.name); - MemberDecorate(per_vertex_struct, static_cast(index), spv::Decoration::BuiltIn, - static_cast(member.builtin)); - } - - return {per_vertex_struct, indices}; - } - - void VisitBasicBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - const auto operation_index = static_cast(operation->GetCode()); - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {v_float_zero, Type::Float}; - } - return {OpLoad(t_float, registers.at(index)), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - return {Constant(t_uint, immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> Id { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return v_true; - case Tegra::Shader::Pred::NeverExecute: - return v_false; - default: - return OpLoad(t_bool, predicates.at(index)); - } - }(); - if (predicate->IsNegated()) { - return {OpLogicalNot(t_bool, value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - const auto attribute = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const auto& buffer = abuf->GetBuffer(); - - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsInputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - switch (attribute) { - case Attribute::Index::Position: { - if (stage == ShaderType::Fragment) { - return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), - Type::Float}; - } - const std::vector elements = {in_indices.position.value(), element}; - return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; - } - case Attribute::Index::PointCoord: { - switch (element) { - case 0: - case 1: - return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), - Type::Float}; - } - UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); - return {v_float_zero, Type::Float}; - } - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - switch (element) { - case 0: - case 1: - return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), - Type::Float}; - case 2: - return { - OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), - Type::Int}; - case 3: - return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), - Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {Constant(t_uint, 0U), Type::Uint}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - if (element == 3) { - const Id is_front_facing = OpLoad(t_bool, front_facing); - const Id true_value = Constant(t_int, static_cast(-1)); - const Id false_value = Constant(t_int, 0); - return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {v_float_zero, Type::Float}; - default: - if (!IsGenericAttribute(attribute)) { - break; - } - const u32 location = GetGenericAttributeLocation(attribute); - if (!IsAttributeEnabled(location)) { - // Disabled attributes (also known as constant attributes) always return zero. - return {v_float_zero, Type::Float}; - } - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {v_float_zero, Type::Float}; - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node& offset = cbuf->GetOffset(); - const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - - Id pointer{}; - if (device.IsKhrUniformBufferStandardLayoutSupported()) { - const Id buffer_offset = - OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); - pointer = - OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); - } else { - Id buffer_index{}; - Id buffer_element{}; - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - } else if (std::holds_alternative(*offset)) { - // Indirect access - const Id offset_id = AsUint(Visit(offset)); - const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); - const Id final_offset = - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); - buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); - buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); - } - pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, - buffer_element); - } - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto gmem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_prv_float, local_memory, address); - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto smem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const Id flag = internal_flags.at(static_cast(internal_flag->GetFlag())); - return {OpLoad(t_bool, flag), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - // It's invalid to call conditional on nested nodes, use an operation instead - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - const Id condition = AsBool(Visit(conditional->GetCondition())); - OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); - OpBranchConditional(condition, true_label, skip_label); - AddLabel(true_label); - - conditional_branch_set = true; - inside_branch = false; - VisitBasicBlock(conditional->GetCode()); - conditional_branch_set = false; - if (!inside_branch) { - OpBranch(skip_label); - } else { - inside_branch = false; - } - AddLabel(skip_label); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - if (device.HasDebuggingToolAttached()) { - // We should insert comments with OpString instead of using named variables - Name(OpUndef(t_int), comment->GetText()); - } - return {}; - } - - UNREACHABLE(); - return {}; - } - - template - Expression Unary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - - const Id value = (this->*func)(type_def, op_a); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Binary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - - const Id value = (this->*func)(type_def, op_a, op_b); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Ternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Quaternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - const Id op_d = As(Visit(operation[3]), type_d); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target{}; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit its source - // because it might have side effects. - Visit(src); - return {}; - } - target = {registers.at(gpr->GetIndex()), Type::Float}; - - } else if (const auto abuf = std::get_if(&*dest)) { - const auto& buffer = abuf->GetBuffer(); - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsOutputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - target = [&]() -> Expression { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex(); attribute) { - case Attribute::Index::Position: { - const u32 index = out_indices.position.value(); - return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; - } - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 1: { - if (!out_indices.layer) { - return {}; - } - const u32 index = out_indices.layer.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 2: { - if (!out_indices.viewport) { - return {}; - } - const u32 index = out_indices.viewport.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 3: { - const auto index = out_indices.point_size.value(); - return {AccessElement(t_out_float, out_vertex, index), Type::Float}; - } - default: - UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); - return {}; - } - case Attribute::Index::ClipDistances0123: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; - } - case Attribute::Index::ClipDistances4567: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element + 4), - Type::Float}; - } - default: - if (IsGenericAttribute(attribute)) { - const u8 offset = static_cast(static_cast(attribute) * 4 + element); - const GenericVaryingDescription description = output_attributes.at(offset); - const Id composite = description.id; - std::vector indices; - if (!description.is_scalar) { - indices.push_back(element - description.first_element); - } - return {ArrayPass(t_out_float, composite, indices), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", - static_cast(attribute)); - return {}; - } - }(); - - } else if (const auto patch = std::get_if(&*dest)) { - target = [&]() -> Expression { - const u32 offset = patch->GetOffset(); - switch (offset) { - case 0: - case 1: - case 2: - case 3: - return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; - case 4: - case 5: - return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); - return {}; - }(); - - } else if (const auto lmem = std::get_if(&*dest)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpUDiv(t_uint, address, Constant(t_uint, 4)); - target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; - - } else if (const auto smem = std::get_if(&*dest)) { - target = {GetSharedMemoryPointer(*smem), Type::Uint}; - - } else if (const auto gmem = std::get_if(&*dest)) { - target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - - } else if (const auto cv = std::get_if(&*dest)) { - target = {custom_variables.at(cv->GetIndex()), Type::Float}; - - } else { - UNIMPLEMENTED(); - } - - if (!target.id) { - // On failure we return a nullptr target.id, skip these stores. - return {}; - } - - OpStore(target.id, As(Visit(src), target.type)); - return {}; - } - - template - Expression FCastHalf(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), - Type::Float}; - } - - Expression FSwizzleAdd(Operation operation) { - const Id minus = Constant(t_float, -1.0f); - const Id plus = v_float_one; - const Id zero = v_float_zero; - const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); - const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); - - Id mask = OpLoad(t_uint, thread_id); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); - mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - - const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); - const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); - - const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); - const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); - return {OpFAdd(t_float, op_a, op_b), Type::Float}; - } - - Expression HNegate(Operation operation) { - const bool is_f16 = device.IsFloat16Supported(); - const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); - const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); - const auto GetNegate = [&](std::size_t index) { - return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); - }; - const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); - return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const auto Pack = [&](std::size_t index) { - const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); - return OpCompositeConstruct(t_half, scalar, scalar); - }; - const Id value = AsHalfFloat(Visit(operation[0])); - const Id min = Pack(1); - const Id max = Pack(2); - - const Id clamped = OpFClamp(t_half, value, min, max); - if (IsPrecise(operation)) { - Decorate(clamped, spv::Decoration::NoContraction); - } - return {clamped, Type::HalfFloat}; - } - - Expression HCastFloat(Operation operation) { - const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = Visit(operation[0]); - const auto type = std::get(operation.GetMeta()); - if (type == Tegra::Shader::HalfType::H0_H1) { - return operand; - } - const auto value = [&] { - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::F32: - return GetHalfScalarFromFloat(AsFloat(operand)); - case Tegra::Shader::HalfType::H0_H0: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); - case Tegra::Shader::HalfType::H1_H1: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); - default: - UNREACHABLE(); - return ConstantNull(t_half); - } - }(); - return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; - } - - Expression HMergeF32(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; - } - - template - Expression HMergeHN(Operation operation) { - const Id target = AsHalfFloat(Visit(operation[0])); - const Id source = AsHalfFloat(Visit(operation[1])); - const Id object = OpCompositeExtract(t_scalar_half, source, offset); - return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); - return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; - } - - Expression LogicalAddCarry(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - const Id op_b = AsUint(Visit(operation[1])); - - const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); - const Id carry = OpCompositeExtract(t_uint, result, 1); - return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Id target{}; - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = predicates.at(index); - - } else if (const auto flag = std::get_if(&*dest)) { - target = internal_flags.at(static_cast(flag->GetFlag())); - } - - OpStore(target, AsBool(Visit(src))); - return {}; - } - - Expression LogicalFOrdered(Operation operation) { - // Emulate SPIR-V's OpOrdered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); - const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); - return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; - } - - Expression LogicalFUnordered(Operation operation) { - // Emulate SPIR-V's OpUnordered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_nan_a = OpIsNan(t_bool, op_a); - const Id is_nan_b = OpIsNan(t_bool, op_b); - return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; - } - - Id GetTextureSampler(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - ASSERT(!meta.sampler.is_buffer); - - const auto& entry = sampled_images.at(meta.sampler.index); - Id sampler = entry.variable; - if (meta.sampler.is_indexed) { - const Id index = AsInt(Visit(meta.index)); - sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); - } - return OpLoad(entry.sampler_type, sampler); - } - - Id GetTextureImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 index = meta.sampler.index; - if (meta.sampler.is_buffer) { - const auto& entry = uniform_texels.at(index); - return OpLoad(entry.image_type, entry.image); - } else { - const auto& entry = sampled_images.at(index); - return OpImage(entry.image_type, GetTextureSampler(operation)); - } - } - - Id GetImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto entry = images.at(meta.image.index); - return OpLoad(entry.image_type, entry.image); - } - - Id AssembleVector(const std::vector& coords, Type type) { - const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); - return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); - } - - Id GetCoordinates(Operation operation, Type type) { - std::vector coords; - for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { - coords.push_back(As(Visit(operation[i]), type)); - } - if (const auto meta = std::get_if(&operation.GetMeta())) { - // Add array coordinate for textures - if (meta->sampler.is_array) { - Id array = AsInt(Visit(meta->array)); - if (type == Type::Float) { - array = OpConvertSToF(t_float, array); - } - coords.push_back(array); - } - } - return AssembleVector(coords, type); - } - - Id GetOffsetCoordinates(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::vector coords; - coords.reserve(meta.aoffi.size()); - for (const auto& coord : meta.aoffi) { - coords.push_back(AsInt(Visit(coord))); - } - return AssembleVector(coords, Type::Int); - } - - std::pair GetDerivatives(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto& derivatives = meta.derivates; - ASSERT(derivatives.size() % 2 == 0); - - const std::size_t components = derivatives.size() / 2; - std::vector dx, dy; - dx.reserve(components); - dy.reserve(components); - for (std::size_t index = 0; index < components; ++index) { - dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); - dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); - } - return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; - } - - Expression GetTextureElement(Operation operation, Id sample_value, Type type) { - const auto& meta = std::get(operation.GetMeta()); - const auto type_def = GetTypeDefinition(type); - return {OpCompositeExtract(type_def, sample_value, meta.element), type}; - } - - Expression Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const bool can_implicit = stage == ShaderType::Fragment; - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - - std::vector operands; - spv::ImageOperandsMask mask{}; - if (meta.bias) { - mask = mask | spv::ImageOperandsMask::Bias; - operands.push_back(AsFloat(Visit(meta.bias))); - } - - if (!can_implicit) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(v_float_zero); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.depth_compare) { - // Depth sampling - UNIMPLEMENTED_IF(meta.bias); - const Id dref = AsFloat(Visit(meta.depth_compare)); - if (can_implicit) { - return { - OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } else { - return { - OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - } - - Id texture; - if (can_implicit) { - texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); - } else { - texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const Id lod = AsFloat(Visit(meta.lod)); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; - std::vector operands{lod}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - const Id dref = AsFloat(Visit(meta.depth_compare)); - return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id coords = GetCoordinates(operation, Type::Float); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id texture{}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare)), mask, operands); - } else { - u32 component_value = 0; - if (meta.component) { - const auto component = std::get_if(&*meta.component); - ASSERT_MSG(component, "Component is not an immediate value"); - component_value = component->GetValue(); - } - texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value), mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const auto image_id = GetTextureImage(operation); - if (meta.element == 3) { - return {OpImageQueryLevels(t_int, image_id), Type::Int}; - } - - const Id lod = AsUint(Visit(operation[0])); - const std::size_t coords_count = [&meta] { - switch (const auto type = meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return 1; - case Tegra::Shader::TextureType::Texture2D: - case Tegra::Shader::TextureType::TextureCube: - return 2; - case Tegra::Shader::TextureType::Texture3D: - return 3; - default: - UNREACHABLE_MSG("Invalid texture type={}", type); - return 2; - } - }(); - - if (meta.element >= coords_count) { - return {v_float_zero, Type::Float}; - } - - const std::array types = {t_int, t_int2, t_int3}; - const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); - const Id size = OpCompositeExtract(t_int, sizes, meta.element); - return {size, Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - if (meta.element >= 2) { - UNREACHABLE_MSG("Invalid element"); - return {v_float_zero, Type::Float}; - } - const auto sampler_id = GetTextureSampler(operation); - - const Id multiplier = Constant(t_float, 256.0f); - const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); - - const Id coords = GetCoordinates(operation, Type::Float); - Id size = OpImageQueryLod(t_float2, sampler_id, coords); - size = OpFMul(t_float2, size, multipliers); - size = OpConvertFToS(t_int2, size); - return GetTextureElement(operation, size, Type::Int); - } - - Expression TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const Id image = GetTextureImage(operation); - const Id coords = GetCoordinates(operation, Type::Int); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id fetch; - - if (meta.lod && !meta.sampler.is_buffer) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(AsInt(Visit(meta.lod))); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - fetch = OpImageFetch(t_float4, image, coords, mask, operands); - return GetTextureElement(operation, fetch, Type::Float); - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const auto [dx, dy] = GetDerivatives(operation); - const std::vector grad = {dx, dy}; - - static constexpr auto mask = spv::ImageOperandsMask::Grad; - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression ImageLoad(Operation operation) { - if (!device.IsFormatlessImageLoadSupported()) { - return {v_float_zero, Type::Float}; - } - - const auto& meta{std::get(operation.GetMeta())}; - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); - - return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto meta{std::get(operation.GetMeta())}; - std::vector colors; - for (const auto& value : meta.values) { - colors.push_back(AsUint(Visit(value))); - } - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpCompositeConstruct(t_uint4, colors); - - OpImageWrite(GetImage(operation), coords, texel, {}); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - const Id coordinate = GetCoordinates(operation, Type::Int); - const Id image = images.at(meta.image.index).image; - const Id sample = v_uint_zero; - const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); - - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(meta.values[0])); - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - Id pointer; - if (const auto smem = std::get_if(&*operation[0])) { - pointer = GetSharedMemoryPointer(*smem); - } else if (const auto gmem = std::get_if(&*operation[0])) { - pointer = GetGlobalMemoryPointer(*gmem); - } else { - UNREACHABLE(); - return {v_float_zero, Type::Float}; - } - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(operation[1])); - - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - Atomic(operation); - return {}; - } - - Expression Branch(Operation operation) { - const auto& target = std::get(*operation[0]); - OpStore(jmp_to, Constant(t_uint, target.GetValue())); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression BranchIndirect(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - - OpStore(jmp_to, op_a); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto& target = std::get(*operation[0]); - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, current); - - OpStore(access, Constant(t_uint, target.GetValue())); - OpStore(flow_stack_top, next); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, previous); - const Id target = OpLoad(t_uint, access); - - OpStore(flow_stack_top, previous); - OpStore(jmp_to, target); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { - using Compare = Maxwell::ComparisonOp; - switch (compare_op) { - case Compare::NeverOld: - return v_false; // Never let the test pass - case Compare::LessOld: - return OpFOrdLessThan(t_bool, operand_1, operand_2); - case Compare::EqualOld: - return OpFOrdEqual(t_bool, operand_1, operand_2); - case Compare::LessEqualOld: - return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); - case Compare::GreaterOld: - return OpFOrdGreaterThan(t_bool, operand_1, operand_2); - case Compare::NotEqualOld: - return OpFOrdNotEqual(t_bool, operand_1, operand_2); - case Compare::GreaterEqualOld: - return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); - default: - UNREACHABLE(); - return v_true; - } - } - - void AlphaTest(Id pointer) { - if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { - return; - } - const Id true_label = OpLabel(); - const Id discard_label = OpLabel(); - const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); - const Id alpha_value = OpLoad(t_float, pointer); - const Id condition = - MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); - - OpBranchConditional(condition, true_label, discard_label); - AddLabel(discard_label); - OpKill(); - AddLabel(true_label); - } - - void PreExit() { - if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { - const u32 position_index = out_indices.position.value(); - const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); - const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); - Id depth = OpLoad(t_float, z_pointer); - depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); - depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); - OpStore(z_pointer, depth); - } - if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [this](u32 reg) { - if (const auto it = registers.find(reg); it != registers.end()) { - return OpLoad(t_float, it->second); - } - return v_float_zero; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, - "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); - OpStore(pointer, SafeGetRegister(current_reg)); - if (rt == 0 && component == 3) { - AlphaTest(pointer); - } - ++current_reg; - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and - // current_reg already contains one past the last color register. - OpStore(frag_depth, SafeGetRegister(current_reg + 1)); - } - } - } - - Expression Exit(Operation operation) { - PreExit(); - inside_branch = true; - if (conditional_branch_set) { - OpReturn(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpReturn(); - AddLabel(); - } - return {}; - } - - Expression Discard(Operation operation) { - inside_branch = true; - if (conditional_branch_set) { - OpKill(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpKill(); - AddLabel(); - } - return {}; - } - - Expression EmitVertex(Operation) { - OpEmitVertex(); - return {}; - } - - Expression EndPrimitive(Operation operation) { - OpEndPrimitive(); - return {}; - } - - Expression InvocationId(Operation) { - return {OpLoad(t_int, invocation_id), Type::Int}; - } - - Expression YNegate(Operation) { - LOG_WARNING(Render_Vulkan, "(STUBBED)"); - return {Constant(t_float, 1.0f), Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - const Id id = OpLoad(t_uint3, local_invocation_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation operation) { - const Id id = OpLoad(t_uint3, workgroup_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const Id predicate = AsBool(Visit(operation[0])); - const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); - - if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { - // Guest-like devices can just return the first index. - return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; - } - - // The others will have to return what is local to the current thread. - // For instance a device with a warp size of 64 will return the upper uint when the current - // thread is 38. - const Id tid = OpLoad(t_uint, thread_id); - const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); - return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; - } - - template - Expression Vote(Operation operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id predicate = AsBool(Visit(operation[0])); - return {(this->*func)(t_bool, predicate), Type::Bool}; - } - - Expression ThreadId(Operation) { - return {OpLoad(t_uint, thread_id), Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id mask = thread_masks[index]; - return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - const Id value = AsFloat(Visit(operation[0])); - const Id index = AsUint(Visit(operation[1])); - return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); - return {}; - } - - const auto scope = spv::Scope::Workgroup; - const auto memory = spv::Scope::Workgroup; - const auto semantics = - spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; - OpControlBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(memory)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - template - Expression MemoryBarrier(Operation) { - const auto semantics = - spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | - spv::MemorySemanticsMask::WorkgroupMemory | - spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; - - OpMemoryBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { - const Id id = OpVariable(type, storage); - Decorate(id, spv::Decoration::BuiltIn, static_cast(builtin)); - AddGlobalVariable(Name(id, std::move(name))); - interfaces.push_back(id); - return id; - } - - Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { - return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); - } - - template - Id AccessElement(Id pointer_type, Id composite, Args... elements_) { - std::vector members; - auto elements = {elements_...}; - for (const auto element : elements) { - members.push_back(Constant(t_uint, element)); - } - - return OpAccessChain(pointer_type, composite, members); - } - - Id As(Expression expr, Type wanted_type) { - switch (wanted_type) { - case Type::Bool: - return AsBool(expr); - case Type::Bool2: - return AsBool2(expr); - case Type::Float: - return AsFloat(expr); - case Type::Int: - return AsInt(expr); - case Type::Uint: - return AsUint(expr); - case Type::HalfFloat: - return AsHalfFloat(expr); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsBool(Expression expr) { - ASSERT(expr.type == Type::Bool); - return expr.id; - } - - Id AsBool2(Expression expr) { - ASSERT(expr.type == Type::Bool2); - return expr.id; - } - - Id AsFloat(Expression expr) { - switch (expr.type) { - case Type::Float: - return expr.id; - case Type::Int: - case Type::Uint: - return OpBitcast(t_float, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_float, expr.id); - } - return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsInt(Expression expr) { - switch (expr.type) { - case Type::Int: - return expr.id; - case Type::Float: - case Type::Uint: - return OpBitcast(t_int, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_int, expr.id); - } - return OpPackHalf2x16(t_int, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsUint(Expression expr) { - switch (expr.type) { - case Type::Uint: - return expr.id; - case Type::Float: - case Type::Int: - return OpBitcast(t_uint, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_uint, expr.id); - } - return OpPackHalf2x16(t_uint, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsHalfFloat(Expression expr) { - switch (expr.type) { - case Type::HalfFloat: - return expr.id; - case Type::Float: - case Type::Int: - case Type::Uint: - if (device.IsFloat16Supported()) { - return OpBitcast(t_half, expr.id); - } - return OpUnpackHalf2x16(t_half, AsUint(expr)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id GetHalfScalarFromFloat(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_scalar_half, value); - } - return value; - } - - Id GetFloatFromHalfScalar(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_float, value); - } - return value; - } - - AttributeType GetAttributeType(u32 location) const { - if (stage != ShaderType::Vertex) { - return {Type::Float, t_in_float, t_in_float4}; - } - switch (specialization.attribute_types.at(location)) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return {Type::Float, t_in_float, t_in_float4}; - case Maxwell::VertexAttribute::Type::SignedInt: - return {Type::Int, t_in_int, t_in_int4}; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return {Type::Uint, t_in_uint, t_in_uint4}; - default: - UNREACHABLE(); - return {Type::Float, t_in_float, t_in_float4}; - } - } - - Id GetTypeDefinition(Type type) const { - switch (type) { - case Type::Bool: - return t_bool; - case Type::Bool2: - return t_bool2; - case Type::Float: - return t_float; - case Type::Int: - return t_int; - case Type::Uint: - return t_uint; - case Type::HalfFloat: - return t_half; - default: - UNREACHABLE(); - return {}; - } - } - - std::array GetTypeVectorDefinitionLut(Type type) const { - switch (type) { - case Type::Float: - return {t_float, t_float2, t_float3, t_float4}; - case Type::Int: - return {t_int, t_int2, t_int3, t_int4}; - case Type::Uint: - return {t_uint, t_uint2, t_uint3, t_uint4}; - default: - UNIMPLEMENTED(); - return {}; - } - } - - std::tuple CreateFlowStack() { - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - constexpr auto storage_class = spv::StorageClass::Function; - - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); - const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, - ConstantNull(flow_stack_type)); - const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); - AddLocalVariable(stack); - AddLocalVariable(top); - return std::tie(stack, top); - } - - std::pair GetFlowStack(Operation operation) { - const auto stack_class = std::get(operation.GetMeta()); - switch (stack_class) { - case MetaStackClass::Ssy: - return {ssy_flow_stack, ssy_flow_stack_top}; - case MetaStackClass::Pbk: - return {pbk_flow_stack, pbk_flow_stack_top}; - } - UNREACHABLE(); - return {}; - } - - Id GetGlobalMemoryPointer(const GmemNode& gmem) { - const Id real = AsUint(Visit(gmem.GetRealAddress())); - const Id base = AsUint(Visit(gmem.GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - const Id buffer = global_buffers.at(gmem.GetDescriptor()); - return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); - } - - Id GetSharedMemoryPointer(const SmemNode& smem) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - return OpAccessChain(t_smem_uint, shared_memory, address); - } - - static constexpr std::array operation_decompilers = { - &SPIRVDecompiler::Assign, - - &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, - Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, - &SPIRVDecompiler::FCastHalf<0>, - &SPIRVDecompiler::FCastHalf<1>, - &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, - &SPIRVDecompiler::FSwizzleAdd, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, - - &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, - &SPIRVDecompiler::HNegate, - &SPIRVDecompiler::HClamp, - &SPIRVDecompiler::HCastFloat, - &SPIRVDecompiler::HUnpack, - &SPIRVDecompiler::HMergeF32, - &SPIRVDecompiler::HMergeHN<0>, - &SPIRVDecompiler::HMergeHN<1>, - &SPIRVDecompiler::HPack2, - - &SPIRVDecompiler::LogicalAssign, - &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, - &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, - Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::LogicalFOrdered, - &SPIRVDecompiler::LogicalFUnordered, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, - - &SPIRVDecompiler::LogicalAddCarry, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - // TODO(Rodrigo): Should these use the OpFUnord* variants? - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - - &SPIRVDecompiler::Texture, - &SPIRVDecompiler::TextureLod, - &SPIRVDecompiler::TextureGather, - &SPIRVDecompiler::TextureQueryDimensions, - &SPIRVDecompiler::TextureQueryLod, - &SPIRVDecompiler::TexelFetch, - &SPIRVDecompiler::TextureGradient, - - &SPIRVDecompiler::ImageLoad, - &SPIRVDecompiler::ImageStore, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Branch, - &SPIRVDecompiler::BranchIndirect, - &SPIRVDecompiler::PushFlowStack, - &SPIRVDecompiler::PopFlowStack, - &SPIRVDecompiler::Exit, - &SPIRVDecompiler::Discard, - - &SPIRVDecompiler::EmitVertex, - &SPIRVDecompiler::EndPrimitive, - - &SPIRVDecompiler::InvocationId, - &SPIRVDecompiler::YNegate, - &SPIRVDecompiler::LocalInvocationId<0>, - &SPIRVDecompiler::LocalInvocationId<1>, - &SPIRVDecompiler::LocalInvocationId<2>, - &SPIRVDecompiler::WorkGroupId<0>, - &SPIRVDecompiler::WorkGroupId<1>, - &SPIRVDecompiler::WorkGroupId<2>, - - &SPIRVDecompiler::BallotThread, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, - - &SPIRVDecompiler::ThreadId, - &SPIRVDecompiler::ThreadMask<0>, // Eq - &SPIRVDecompiler::ThreadMask<1>, // Ge - &SPIRVDecompiler::ThreadMask<2>, // Gt - &SPIRVDecompiler::ThreadMask<3>, // Le - &SPIRVDecompiler::ThreadMask<4>, // Lt - &SPIRVDecompiler::ShuffleIndexed, - - &SPIRVDecompiler::Barrier, - &SPIRVDecompiler::MemoryBarrier, - &SPIRVDecompiler::MemoryBarrier, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - const Device& device; - const ShaderIR& ir; - const ShaderType stage; - const Tegra::Shader::Header header; - const Registry& registry; - const Specialization& specialization; - std::unordered_map transform_feedback; - - const Id t_void = Name(TypeVoid(), "void"); - - const Id t_bool = Name(TypeBool(), "bool"); - const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); - - const Id t_int = Name(TypeInt(32, true), "int"); - const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); - const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); - const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); - - const Id t_uint = Name(TypeInt(32, false), "uint"); - const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); - const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); - const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); - - const Id t_float = Name(TypeFloat(32), "float"); - const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); - const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); - const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); - - const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); - const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); - - const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); - - const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); - const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); - const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); - const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); - const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); - const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); - const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); - const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); - const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); - const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); - - const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); - - const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); - const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); - - const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_std140 = Decorate( - Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), - spv::Decoration::ArrayStride, 16U); - const Id t_cbuf_scalar = Decorate( - Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), - spv::Decoration::ArrayStride, 4U); - const Id t_cbuf_std140_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_scalar_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); - const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); - - Id t_smem_uint{}; - - const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); - const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); - const Id t_gmem_struct = MemberDecorate( - Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); - - const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); - - const Id v_float_zero = Constant(t_float, 0.0f); - const Id v_float_one = Constant(t_float, 1.0f); - const Id v_uint_zero = Constant(t_uint, 0); - - // Nvidia uses these defaults for varyings (e.g. position and generic attributes) - const Id v_varying_default = - ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); - - const Id v_true = ConstantTrue(t_bool); - const Id v_false = ConstantFalse(t_bool); - - Id t_scalar_half{}; - Id t_half{}; - - Id out_vertex{}; - Id in_vertex{}; - std::map registers; - std::map custom_variables; - std::map predicates; - std::map flow_variables; - Id local_memory{}; - Id shared_memory{}; - std::array internal_flags{}; - std::map input_attributes; - std::unordered_map output_attributes; - std::map constant_buffers; - std::map global_buffers; - std::map uniform_texels; - std::map sampled_images; - std::map images; - - std::array frag_colors{}; - Id instance_index{}; - Id vertex_index{}; - Id base_instance{}; - Id base_vertex{}; - Id frag_depth{}; - Id frag_coord{}; - Id front_facing{}; - Id point_coord{}; - Id tess_level_outer{}; - Id tess_level_inner{}; - Id tess_coord{}; - Id invocation_id{}; - Id workgroup_id{}; - Id local_invocation_id{}; - Id thread_id{}; - std::array thread_masks{}; // eq, ge, gt, le, lt - - VertexIndices in_indices; - VertexIndices out_indices; - - std::vector interfaces; - - Id jmp_to{}; - Id ssy_flow_stack_top{}; - Id pbk_flow_stack_top{}; - Id ssy_flow_stack{}; - Id pbk_flow_stack{}; - Id continue_label{}; - std::map labels; - - bool conditional_branch_set{}; - bool inside_branch{}; -}; - -class ExprDecompiler { -public: - explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - Id operator()(const ExprAnd& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalAnd(type_def, op1, op2); - } - - Id operator()(const ExprOr& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalOr(type_def, op1, op2); - } - - Id operator()(const ExprNot& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - return decomp.OpLogicalNot(type_def, op1); - } - - Id operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); - } - - Id operator()(const ExprCondCode& expr) { - return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); - } - - Id operator()(const ExprVar& expr) { - return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); - } - - Id operator()(const ExprBoolean& expr) { - return expr.value ? decomp.v_true : decomp.v_false; - } - - Id operator()(const ExprGprEqual& expr) { - const Id target = decomp.Constant(decomp.t_uint, expr.value); - Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); - gpr = decomp.OpBitcast(decomp.t_uint, gpr); - return decomp.OpIEqual(decomp.t_bool, gpr, target); - } - - Id Visit(const Expr& node) { - return std::visit(*this, *node); - } - -private: - SPIRVDecompiler& decomp; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(endif_label); - decomp.AddLabel(endif_label); - } - - void operator()([[maybe_unused]] const ASTIfElse& ast) { - UNREACHABLE(); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBasicBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpStore(decomp.flow_variables.at(ast.index), condition); - } - - void operator()([[maybe_unused]] const ASTLabel& ast) { - // Do nothing - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - const Id loop_label = decomp.OpLabel(); - const Id endloop_label = decomp.OpLabel(); - const Id loop_start_block = decomp.OpLabel(); - const Id loop_continue_block = decomp.OpLabel(); - current_loop_exit = endloop_label; - decomp.OpBranch(loop_label); - decomp.AddLabel(loop_label); - decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); - decomp.OpBranch(loop_start_block); - decomp.AddLabel(loop_start_block); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(loop_continue_block); - decomp.AddLabel(loop_continue_block); - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpBranchConditional(condition, loop_label, endloop_label); - decomp.AddLabel(endloop_label); - } - - void operator()(const ASTReturn& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(decomp.OpLabel()); - } - } - - void operator()(const ASTBreak& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(decomp.OpLabel()); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - SPIRVDecompiler& decomp; - Id current_loop_exit{}; -}; - -void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", i)); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - - DefinePrologue(); - - const ASTNode program = ir.GetASTProgram(); - ASTDecompiler decompiler{*this}; - decompiler.Visit(program); - - const Id next_block = OpLabel(); - OpBranch(next_block); - AddLabel(next_block); -} - -} // Anonymous namespace - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second, cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_buffers.emplace_back(GlobalBufferEntry{ - .cbuf_index = base.cbuf_index, - .cbuf_offset = base.cbuf_offset, - .is_written = usage.is_written, - }); - } - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - entries.uniform_texels.emplace_back(sampler); - } else { - entries.samplers.emplace_back(sampler); - } - } - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - entries.storage_texels.emplace_back(image); - } else { - entries.images.emplace_back(image); - } - } - for (const auto& attribute : ir.GetInputAttributes()) { - if (IsGenericAttribute(attribute)) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); - } - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - entries.uses_warps = ir.UsesWarps(); - return entries; -} - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const VideoCommon::Shader::Registry& registry, - const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace Vulkan { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using StorageTexelEntry = VideoCommon::Shader::ImageEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -constexpr u32 DESCRIPTOR_SET = 0; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) - : ConstBuffer{entry_}, index{index_} {} - - constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -struct GlobalBufferEntry { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_written{}; -}; - -struct ShaderEntries { - u32 NumBindings() const { - return static_cast(const_buffers.size() + global_buffers.size() + - uniform_texels.size() + samplers.size() + storage_texels.size() + - images.size()); - } - - std::vector const_buffers; - std::vector global_buffers; - std::vector uniform_texels; - std::vector samplers; - std::vector storage_texels; - std::vector images; - std::set attributes; - std::array clip_distances{}; - std::size_t shader_length{}; - u32 enabled_uniform_buffers{}; - bool uses_warps{}; -}; - -struct Specialization final { - u32 base_binding{}; - - // Compute specific - std::array workgroup_size{}; - u32 shared_memory_size{}; - - // Graphics specific - std::optional point_size; - std::bitset enabled_attributes; - std::array attribute_types{}; - bool ndc_minus_one_to_one{}; - bool early_fragment_tests{}; - float alpha_test_ref{}; - Maxwell::ComparisonOp alpha_test_func{}; -}; -// Old gcc versions don't consider this trivially copyable. -// static_assert(std::is_trivially_copyable_v); - -struct SPIRVShader { - std::vector code; - ShaderEntries entries; -}; - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, - const VideoCommon::Shader::Registry& registry, - const Specialization& specialization); - -} // namespace Vulkan From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 11 Feb 2021 16:39:06 -0300 Subject: [PATCH 012/770] shader: Initial implementation of an AST --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 4 +- .../backend/spirv/emit_spirv.cpp | 45 +- .../backend/spirv/emit_spirv.h | 18 +- .../spirv/emit_spirv_context_get_set.cpp | 8 + .../backend/spirv/emit_spirv_control_flow.cpp | 25 - .../backend/spirv/emit_spirv_undefined.cpp | 16 +- .../frontend/ir/basic_block.cpp | 64 +- .../frontend/ir/basic_block.h | 40 +- .../frontend/ir/condition.cpp | 14 +- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/function.h | 2 +- .../frontend/ir/ir_emitter.cpp | 43 +- .../frontend/ir/ir_emitter.h | 23 +- .../frontend/ir/microinstruction.cpp | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 16 +- .../frontend/ir/structured_control_flow.cpp | 742 ++++++++++++++++++ .../frontend/ir/structured_control_flow.h | 22 + .../frontend/maxwell/control_flow.cpp | 424 ++++------ .../frontend/maxwell/control_flow.h | 77 +- .../frontend/maxwell/location.h | 12 +- .../frontend/maxwell/program.cpp | 69 +- .../frontend/maxwell/program.h | 2 +- .../frontend/maxwell/termination_code.cpp | 86 -- .../frontend/maxwell/termination_code.h | 17 - .../translate/impl/integer_shift_left.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 17 +- .../frontend/maxwell/translate/translate.h | 7 +- .../ir_opt/constant_propagation_pass.cpp | 50 ++ .../ir_opt/ssa_rewrite_pass.cpp | 24 +- .../ir_opt/verification_pass.cpp | 4 + src/shader_recompiler/main.cpp | 29 +- src/shader_recompiler/shader_info.h | 28 + 33 files changed, 1347 insertions(+), 591 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h create mode 100644 src/shader_recompiler/shader_info.h diff --git a/externals/sirit b/externals/sirit index 1f7b70730..c374bfd9f 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 1f7b70730d610cfbd5099ab93dd38ec8a78e7e35 +Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 12fbcb37c..27fc79e21 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -36,6 +36,8 @@ add_executable(shader_recompiler frontend/ir/program.cpp frontend/ir/program.h frontend/ir/reg.h + frontend/ir/structured_control_flow.cpp + frontend/ir/structured_control_flow.h frontend/ir/type.cpp frontend/ir/type.h frontend/ir/value.cpp @@ -51,8 +53,6 @@ add_executable(shader_recompiler frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h - frontend/maxwell/termination_code.cpp - frontend/maxwell/termination_code.h frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7c4269fad..5022b5159 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -105,8 +105,26 @@ void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->Opcode()); } -void EmitSPIRV::EmitPhi(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +static Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.u1; + default: + throw NotImplementedException("Phi node type {}", type); + } +} + +Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { + const size_t num_args{inst->NumArgs()}; + boost::container::small_vector operands; + operands.reserve(num_args * 2); + for (size_t index = 0; index < num_args; ++index) { + IR::Block* const phi_block{inst->PhiBlock(index)}; + operands.push_back(ctx.Def(inst->Arg(index))); + operands.push_back(ctx.BlockLabel(phi_block)); + } + const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; + return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } void EmitSPIRV::EmitVoid(EmitContext&) {} @@ -115,6 +133,29 @@ void EmitSPIRV::EmitIdentity(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +// FIXME: Move to its own file +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), + ctx.BlockLabel(inst->Arg(2).Label())); +} + +void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Inst* inst) { + ctx.OpLoopMerge(ctx.BlockLabel(inst->Arg(0).Label()), ctx.BlockLabel(inst->Arg(1).Label()), + spv::LoopControlMask::MaskNone); +} + +void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst) { + ctx.OpSelectionMerge(ctx.BlockLabel(inst->Arg(0).Label()), spv::SelectionControlMask::MaskNone); +} + +void EmitSPIRV::EmitReturn(EmitContext& ctx) { + ctx.OpReturn(); +} + void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 3f4b68a7d..9aa83b5de 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -124,18 +124,20 @@ private: void EmitInst(EmitContext& ctx, IR::Inst* inst); // Microinstruction emitters - void EmitPhi(EmitContext& ctx); + Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); void EmitBranch(EmitContext& ctx, IR::Inst* inst); void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); - void EmitExit(EmitContext& ctx); + void EmitLoopMerge(EmitContext& ctx, IR::Inst* inst); + void EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst); void EmitReturn(EmitContext& ctx); - void EmitUnreachable(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); + void EmitSetGotoVariable(EmitContext& ctx); + void EmitGetGotoVariable(EmitContext& ctx); Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx); void EmitSetAttribute(EmitContext& ctx); @@ -151,11 +153,11 @@ private: void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); - void EmitUndef1(EmitContext& ctx); - void EmitUndef8(EmitContext& ctx); - void EmitUndef16(EmitContext& ctx); - void EmitUndef32(EmitContext& ctx); - void EmitUndef64(EmitContext& ctx); + Id EmitUndefU1(EmitContext& ctx); + void EmitUndefU8(EmitContext& ctx); + void EmitUndefU16(EmitContext& ctx); + void EmitUndefU32(EmitContext& ctx); + void EmitUndefU64(EmitContext& ctx); void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index b121305ea..1eab739ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -22,6 +22,14 @@ void EmitSPIRV::EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 770fe113c..66ce6c8c5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,28 +3,3 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" - -namespace Shader::Backend::SPIRV { - -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); -} - -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), - ctx.BlockLabel(inst->Arg(2).Label())); -} - -void EmitSPIRV::EmitExit(EmitContext& ctx) { - ctx.OpReturn(); -} - -void EmitSPIRV::EmitReturn(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSPIRV::EmitUnreachable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 3850b072c..859b60a95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitUndef1(EmitContext&) { +Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { + return ctx.OpUndef(ctx.u1); +} + +void EmitSPIRV::EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef8(EmitContext&) { +void EmitSPIRV::EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef16(EmitContext&) { +void EmitSPIRV::EmitUndefU32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSPIRV::EmitUndef64(EmitContext&) { +void EmitSPIRV::EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index da33ff6f1..b5616f394 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -17,6 +17,8 @@ namespace Shader::IR { Block::Block(ObjectPool& inst_pool_, u32 begin, u32 end) : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool& inst_pool_) : Block{inst_pool_, 0, 0} {} + Block::~Block() = default; void Block::AppendNewInst(Opcode op, std::initializer_list args) { @@ -38,8 +40,25 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { - imm_predecessors.push_back(immediate_predecessor); +void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { + branch_cond = cond; + branch_true = branch_true_; + branch_false = branch_false_; +} + +void Block::SetBranch(Block* branch) { + branch_cond = Condition{true}; + branch_true = branch; +} + +void Block::SetReturn() { + branch_cond = Condition{true}; + branch_true = nullptr; + branch_false = nullptr; +} + +bool Block::IsVirtual() const noexcept { + return location_begin == location_end; } u32 Block::LocationBegin() const noexcept { @@ -58,6 +77,12 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +void Block::AddImmediatePredecessor(Block* block) { + if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { + imm_predecessors.push_back(block); + } +} + std::span Block::ImmediatePredecessors() const noexcept { return imm_predecessors; } @@ -70,8 +95,17 @@ static std::string BlockToIndex(const std::map& block_to_i return fmt::format("$", reinterpret_cast(block)); } +static size_t InstIndex(std::map& inst_to_index, size_t& inst_index, + const Inst* inst) { + const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; + if (is_inserted) { + ++inst_index; + } + return it->second; +} + static std::string ArgToIndex(const std::map& block_to_index, - const std::map& inst_to_index, + std::map& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return ""; @@ -80,10 +114,7 @@ static std::string ArgToIndex(const std::map& block_to_ind return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { - if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { - return fmt::format("%{}", it->second); - } - return fmt::format("%", reinterpret_cast(arg.Inst())); + return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { case Type::U1: @@ -125,14 +156,14 @@ std::string DumpBlock(const Block& block, const std::map& const Opcode op{inst.Opcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); if (TypeOf(op) != Type::Void) { - ret += fmt::format("%{:<5} = {}", inst_index, op); + ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; + const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_index, @@ -140,10 +171,12 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += arg_str; } - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("", actual_type, expected_type); + if (op != Opcode::Phi) { + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { @@ -151,9 +184,6 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += '\n'; } - - inst_to_index.emplace(&inst, inst_index); - ++inst_index; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec3ad6263..3205705e7 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,7 +11,9 @@ #include +#include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -26,6 +28,7 @@ public: using const_reverse_iterator = InstructionList::const_reverse_iterator; explicit Block(ObjectPool& inst_pool_, u32 begin, u32 end); + explicit Block(ObjectPool& inst_pool_); ~Block(); Block(const Block&) = delete; @@ -41,9 +44,15 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u64 flags = 0); - /// Adds a new immediate predecessor to the basic block. - void AddImmediatePredecessor(IR::Block* immediate_predecessor); + /// Set the branches to jump to when all instructions have executed. + void SetBranches(Condition cond, Block* branch_true, Block* branch_false); + /// Set the branch to unconditionally jump to when all instructions have executed. + void SetBranch(Block* branch); + /// Mark the block as a return block. + void SetReturn(); + /// Returns true when the block does not implement any guest instructions directly. + [[nodiscard]] bool IsVirtual() const noexcept; /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; /// Gets the end location for this basic block. @@ -54,8 +63,23 @@ public: /// Gets an immutable reference to the instruction list for this basic block. [[nodiscard]] const InstructionList& Instructions() const noexcept; + /// Adds a new immediate predecessor to this basic block. + void AddImmediatePredecessor(Block* block); /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + + [[nodiscard]] Condition BranchCondition() const noexcept { + return branch_cond; + } + [[nodiscard]] bool IsTerminationBlock() const noexcept { + return !branch_true && !branch_false; + } + [[nodiscard]] Block* TrueBranch() const noexcept { + return branch_true; + } + [[nodiscard]] Block* FalseBranch() const noexcept { + return branch_false; + } [[nodiscard]] bool empty() const { return instructions.empty(); @@ -129,10 +153,18 @@ private: /// List of instructions in this block InstructionList instructions; + /// Condition to choose the branch to take + Condition branch_cond{true}; + /// Block to jump into when the branch condition evaluates as true + Block* branch_true{nullptr}; + /// Block to jump into when the branch condition evaluates as false + Block* branch_false{nullptr}; /// Block immediate predecessors - std::vector imm_predecessors; + std::vector imm_predecessors; }; +using BlockList = std::vector; + [[nodiscard]] std::string DumpBlock(const Block& block); [[nodiscard]] std::string DumpBlock(const Block& block, diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index edff35dc7..ec1659e2b 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -16,15 +16,13 @@ std::string NameOf(Condition condition) { ret = fmt::to_string(condition.FlowTest()); } const auto [pred, negated]{condition.Pred()}; - if (pred != Pred::PT || negated) { - if (!ret.empty()) { - ret += '&'; - } - if (negated) { - ret += '!'; - } - ret += fmt::to_string(pred); + if (!ret.empty()) { + ret += '&'; } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); return ret; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 52737025c..16b4ae888 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -26,7 +26,7 @@ public: explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept : Condition(FlowTest::T, pred_, pred_negated_) {} - Condition(bool value) : Condition(Pred::PT, !value) {} + explicit Condition(bool value) : Condition(Pred::PT, !value) {} auto operator<=>(const Condition&) const noexcept = default; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index bba7d1d39..fd7d56419 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -11,7 +11,7 @@ namespace Shader::IR { struct Function { - boost::container::small_vector blocks; + BlockList blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ada0be834..30932043f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -44,26 +44,29 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(IR::Block* label) { +void IREmitter::Branch(Block* label) { + label->AddImmediatePredecessor(block); Inst(Opcode::Branch, label); } -void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) { - Inst(Opcode::BranchConditional, cond, true_label, false_label); +void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + true_label->AddImmediatePredecessor(block); + false_label->AddImmediatePredecessor(block); + Inst(Opcode::BranchConditional, condition, true_label, false_label); } -void IREmitter::Exit() { - Inst(Opcode::Exit); +void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { + Inst(Opcode::LoopMerge, merge_block, continue_target); +} + +void IREmitter::SelectionMerge(Block* merge_block) { + Inst(Opcode::SelectionMerge, merge_block); } void IREmitter::Return() { Inst(Opcode::Return); } -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); -} - U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } @@ -81,6 +84,14 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +U1 IREmitter::GetGotoVariable(u32 id) { + return Inst(Opcode::GetGotoVariable, id); +} + +void IREmitter::SetGotoVariable(u32 id, const U1& value) { + Inst(Opcode::SetGotoVariable, id, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } @@ -121,6 +132,20 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } +U1 IREmitter::Condition(IR::Condition cond) { + if (cond == IR::Condition{true}) { + return Imm1(true); + } else if (cond == IR::Condition{false}) { + return Imm1(false); + } + const FlowTest flow_test{cond.FlowTest()}; + const auto [pred, is_negated]{cond.Pred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } + throw NotImplementedException("Condition {}", cond); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute) { return Inst(Opcode::GetAttribute, attribute); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bfd9916cc..4decb46bc 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -16,11 +16,11 @@ namespace Shader::IR { class IREmitter { public: - explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) - : block{block_}, insertion_point{insertion_point_} {} + : block{&block_}, insertion_point{insertion_point_} {} - Block& block; + Block* block; [[nodiscard]] U1 Imm1(bool value) const; [[nodiscard]] U8 Imm8(u8 value) const; @@ -31,11 +31,11 @@ public: [[nodiscard]] U64 Imm64(u64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(IR::Block* label); - void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); - void Exit(); + void Branch(Block* label); + void BranchConditional(const U1& condition, Block* true_label, Block* false_label); + void LoopMerge(Block* merge_block, Block* continue_target); + void SelectionMerge(Block* merge_block); void Return(); - void Unreachable(); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -43,6 +43,9 @@ public: [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); void SetPred(IR::Pred pred, const U1& value); + [[nodiscard]] U1 GetGotoVariable(u32 id); + void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); @@ -55,6 +58,8 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); + [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); @@ -168,7 +173,7 @@ private: template T Inst(Opcode op, Args... args) { - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } @@ -184,7 +189,7 @@ private: T Inst(Opcode op, Flags flags, Args... args) { u64 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index e7ca92039..b4ae371bd 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -51,9 +51,9 @@ bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: case Opcode::BranchConditional: - case Opcode::Exit: + case Opcode::LoopMerge: + case Opcode::SelectionMerge: case Opcode::Return: - case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5dc65f2df..ede5e20c2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -10,15 +10,17 @@ OPCODE(Identity, Opaque, Opaq // Control flow OPCODE(Branch, Void, Label, ) OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(Exit, Void, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) -OPCODE(Unreachable, Void, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) OPCODE(GetPred, U1, Pred, ) OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) @@ -36,11 +38,11 @@ OPCODE(WorkgroupId, U32x3, OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(Undef1, U1, ) -OPCODE(Undef8, U8, ) -OPCODE(Undef16, U16, ) -OPCODE(Undef32, U32, ) -OPCODE(Undef64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations OPCODE(LoadGlobalU8, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp new file mode 100644 index 000000000..2e9ce2525 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -0,0 +1,742 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook>; + +using Tree = boost::intrusive::list, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + Block* code; + Node label; + Tree children; + Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return ""; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) + : pool{stmt_pool} { + std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; + fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector BuildUnorderedTreeGetGotos(std::span blocks) { + // Assume all blocks have two branches + std::vector gotos; + gotos.reserve(blocks.size() * 2); + + const std::unordered_map labels_map{BuildLabels(blocks)}; + Tree& root{root_stmt.children}; + auto insert_point{root.begin()}; + for (Block* const block : blocks) { + ++insert_point; // Skip label + ++insert_point; // Skip set variable + root.insert(insert_point, *pool.Create(block, &root_stmt)); + + if (block->IsTerminationBlock()) { + root.insert(insert_point, *pool.Create(Return{})); + continue; + } + const Condition cond{block->BranchCondition()}; + Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; + if (cond == Condition{true} || cond == Condition{false}) { + const bool is_true{cond == Condition{true}}; + const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; + const Node label{labels_map.at(branch)}; + Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_stmt)); + } else { + Statement* const ident_cond{pool.Create(Identity{}, cond)}; + const Node true_label{labels_map.at(block->TrueBranch())}; + const Node false_label{labels_map.at(block->FalseBranch())}; + Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; + Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_true)); + gotos.push_back(root.insert(insert_point, *goto_false)); + } + } + return gotos; + } + + std::unordered_map BuildLabels(std::span blocks) { + // TODO: Consider storing labels intrusively inside the block + std::unordered_map labels_map; + Tree& root{root_stmt.children}; + u32 label_id{0}; + for (const Block* const block : blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + labels_map.emplace(block, root.insert(root.end(), *label)); + Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; + root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + ++label_id; + } + return labels_map; + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + // Update nested if condition + switch (label_nested_stmt->type) { + case StatementType::If: + label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool& pool; + Statement root_stmt{FunctionTag{}}; +}; + +Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, + ObjectPool& stmt_pool_, Statement& root_stmt, + const std::function& func_, BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, Block* continue_block, Block* break_block) { + Tree& tree{parent.children}; + Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IREmitter ir{*current_block}; + ir.Branch(stmt.code); + } + current_block = stmt.code; + func(stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + Block* const first_if_block{block_list.at(first_block_index)}; + IREmitter ir{*current_block}; + const U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + Block* const new_continue_block{block_pool.Create(inst_pool)}; + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + Block* const first_loop_block{block_list.at(first_block_index)}; + IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IREmitter continue_ir{*new_continue_block}; + const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const skip_block{MergeBlock(parent, stmt)}; + + IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IREmitter ir{*current_block}; + ir.Branch(continue_block); + } + } + + Block* MergeBlock(Statement& parent, Statement& stmt) { + if (Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool& stmt_pool; + ObjectPool& inst_pool; + ObjectPool& block_pool; + const std::function& func; + BlockList& block_list; +}; +} // Anonymous namespace + +BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span unordered_blocks, + const std::function& func) { + ObjectPool stmt_pool; + GotoPass goto_pass{unordered_blocks, stmt_pool}; + BlockList block_list; + TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), + func, block_list}; + return block_list; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h new file mode 100644 index 000000000..a574c24f7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { + +[[nodiscard]] BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span unordered_blocks, + const std::function& func); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 21ee98137..e766b555b 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -17,38 +17,49 @@ #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { +namespace { +struct Compare { + bool operator()(const Block& lhs, Location rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(Location lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; +} // Anonymous namespace static u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static std::array Split(Block&& block, Location pc, BlockId new_id) { - if (pc <= block.begin || pc >= block.end) { +static void Split(Block* old_block, Block* new_block, Location pc) { + if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - return { - Block{ - .begin{block.begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{block.id}, - .stack{block.stack}, - .cond{true}, - .branch_true{new_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }, - Block{ - .begin{pc}, - .end{block.end}, - .end_class{block.end_class}, - .id{new_id}, - .stack{std::move(block.stack)}, - .cond{block.cond}, - .branch_true{block.branch_true}, - .branch_false{block.branch_false}, - .imm_predecessors{}, - }, + *new_block = Block{ + .begin{pc}, + .end{old_block->end}, + .end_class{old_block->end_class}, + .stack{old_block->stack}, + .cond{old_block->cond}, + .branch_true{old_block->branch_true}, + .branch_false{old_block->branch_false}, + .ir{nullptr}, + }; + *old_block = Block{ + .begin{old_block->begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{std::move(old_block->stack)}, + .cond{IR::Condition{true}}, + .branch_true{new_block}, + .branch_false{nullptr}, + .ir{nullptr}, }; } @@ -112,7 +123,7 @@ static bool HasFlowTest(Opcode opcode) { static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { - return fmt::format("\"Virtual {}\"", block.id); + return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } @@ -158,126 +169,23 @@ bool Block::Contains(Location pc) const noexcept { Function::Function(Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block_id{0}, + .block{nullptr}, .stack{}, }} {} -void Function::BuildBlocksMap() { - const size_t num_blocks{NumBlocks()}; - blocks_map.resize(num_blocks); - for (size_t block_index = 0; block_index < num_blocks; ++block_index) { - Block& block{blocks_data[block_index]}; - blocks_map[block.id] = █ - } -} - -void Function::BuildImmediatePredecessors() { - for (const Block& block : blocks_data) { - if (block.branch_true != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); - } - if (block.branch_false != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); - } - } -} - -void Function::BuildPostOrder() { - boost::container::small_vector block_stack; - post_order_map.resize(NumBlocks()); - - Block& first_block{blocks_data[blocks.front()]}; - first_block.post_order_visited = true; - block_stack.push_back(first_block.id); - - const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { - if (branch_id == UNREACHABLE_BLOCK_ID) { - return false; - } - if (blocks_map[branch_id]->post_order_visited) { - return false; - } - blocks_map[branch_id]->post_order_visited = true; - - // Calling push_back twice is faster than insert on msvc - block_stack.push_back(block_id); - block_stack.push_back(branch_id); - return true; - }; - while (!block_stack.empty()) { - const Block* const block{blocks_map[block_stack.back()]}; - block_stack.pop_back(); - - if (!visit_branch(block->id, block->branch_true) && - !visit_branch(block->id, block->branch_false)) { - post_order_map[block->id] = static_cast(post_order_blocks.size()); - post_order_blocks.push_back(block->id); - } - } -} - -void Function::BuildImmediateDominators() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; - auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; - auto intersect{[this](Block* finger1, Block* finger2) { - while (finger1 != finger2) { - while (post_order_map[finger1->id] < post_order_map[finger2->id]) { - finger1 = finger1->imm_dominator; - } - while (post_order_map[finger2->id] < post_order_map[finger1->id]) { - finger2 = finger2->imm_dominator; - } - } - return finger1; - }}; - for (Block& block : blocks_data) { - block.imm_dominator = nullptr; - } - Block* const start_block{&blocks_data[blocks.front()]}; - start_block->imm_dominator = start_block; - - bool changed{true}; - while (changed) { - changed = false; - for (Block* const block : post_order_blocks | reverse_order_but_first) { - Block* new_idom{}; - for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { - new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; - } - changed |= block->imm_dominator != new_idom; - block->imm_dominator = new_idom; - } - } -} - -void Function::BuildDominanceFrontier() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; - for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { - for (Block* current : block.imm_predecessors | transform_block_id) { - while (current != block.imm_dominator) { - current->dominance_frontiers.push_back(current->id); - current = current->imm_dominator; - } - } - } -} - -CFG::CFG(Environment& env_, Location start_address) : env{env_} { - VisitFunctions(start_address); - - for (Function& function : functions) { - function.BuildBlocksMap(); - function.BuildImmediatePredecessors(); - function.BuildPostOrder(); - function.BuildImmediateDominators(); - function.BuildDominanceFrontier(); - } -} - -void CFG::VisitFunctions(Location start_address) { +CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) + : env{env_}, block_pool{block_pool_} { functions.emplace_back(start_address); + functions.back().labels.back().block = block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + }); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -294,35 +202,16 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { return; } // Try to find the next block - Function* function{&functions[function_id]}; + Function* const function{&functions[function_id]}; Location pc{label.address}; - const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, - [function](Location pc, u32 block_index) { - return pc < function->blocks_data[block_index].begin; - })}; - const auto next_index{std::distance(function->blocks.begin(), next)}; - const bool is_last{next == function->blocks.end()}; - Location next_pc; - BlockId next_id{UNREACHABLE_BLOCK_ID}; - if (!is_last) { - next_pc = function->blocks_data[*next].begin; - next_id = function->blocks_data[*next].id; - } + const auto next_it{function->blocks.upper_bound(pc, Compare{})}; + const bool is_last{next_it == function->blocks.end()}; + Block* const next{is_last ? nullptr : &*next_it}; // Insert before the next block - Block block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{label.block_id}, - .stack{std::move(label.stack)}, - .cond{true}, - .branch_true{UNREACHABLE_BLOCK_ID}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }; + Block* const block{label.block}; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; - while (is_last || pc < next_pc) { + while (!next || pc < next->begin) { is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; if (is_branch) { break; @@ -332,43 +221,36 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { if (!is_branch) { // If the block finished without a branch, // it means that the next instruction is already visited, jump to it - block.end = pc; - block.cond = true; - block.branch_true = next_id; - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = next; + block->branch_false = nullptr; } // Function's pointer might be invalid, resolve it again - function = &functions[function_id]; - const u32 new_block_index = static_cast(function->blocks_data.size()); - function->blocks.insert(function->blocks.begin() + next_index, new_block_index); - function->blocks_data.push_back(std::move(block)); + // Insert the new block + functions[function_id].blocks.insert(*block); } bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { const Location pc{label.address}; Function& function{functions[function_id]}; - const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { - return function.blocks_data[block_index].Contains(pc); - })}; + const auto it{ + std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; if (it == function.blocks.end()) { // Address has not been visited return false; } - Block& block{function.blocks_data[*it]}; - if (block.begin == pc) { - throw LogicError("Dangling branch"); + Block* const visited_block{&*it}; + if (visited_block->begin == pc) { + throw LogicError("Dangling block"); } - const u32 first_index{*it}; - const u32 second_index{static_cast(function.blocks_data.size())}; - const std::array new_indices{first_index, second_index}; - std::array split_blocks{Split(std::move(block), pc, label.block_id)}; - function.blocks_data[*it] = std::move(split_blocks[0]); - function.blocks_data.push_back(std::move(split_blocks[1])); - function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + Block* const new_block{label.block}; + Split(visited_block, new_block, pc); + function.blocks.insert(it, *new_block); return true; } -CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { +CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { const Instruction inst{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(inst.raw)}; switch (opcode) { @@ -390,12 +272,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); break; case Opcode::RET: - block.end_class = EndClass::Return; + block->end_class = EndClass::Return; break; default: break; } - block.end = pc; + block->end = pc; return AnalysisState::Branch; case Opcode::BRK: case Opcode::CONT: @@ -404,9 +286,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; } - const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; - block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); - block.end = pc; + const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; + block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block->end = pc; return AnalysisState::Branch; } case Opcode::PBK: @@ -414,7 +296,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati case Opcode::PEXIT: case Opcode::PLONGJMP: case Opcode::SSY: - block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); @@ -444,51 +326,51 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati return AnalysisState::Branch; } -void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, +void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond) { - if (block.begin != pc) { + if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later - block.end = pc; - block.cond = true; - block.branch_true = AddLabel(block, block.stack, pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, block->stack, pc, function_id); + block->branch_false = nullptr; return; } - // Impersonate the visited block with a virtual block - // Jump from this virtual to the real conditional instruction and the next instruction - Function& function{functions[function_id]}; - const BlockId conditional_block_id{++function.current_block_id}; - function.blocks.push_back(static_cast(function.blocks_data.size())); - Block& virtual_block{function.blocks_data.emplace_back(Block{ - .begin{}, // Virtual block - .end{}, + // Create a virtual block and a conditional block + Block* const conditional_block{block_pool.Create()}; + Block virtual_block{ + .begin{block->begin.Virtual()}, + .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .id{block.id}, // Impersonating - .stack{block.stack}, + .stack{block->stack}, .cond{cond}, - .branch_true{conditional_block_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - })}; - // Set the end properties of the conditional instruction and give it a new identity - Block& conditional_block{block}; - conditional_block.end = pc; - conditional_block.end_class = insn_end_class; - conditional_block.id = conditional_block_id; + .branch_true{conditional_block}, + .branch_false{nullptr}, + .ir{nullptr}, + }; + // Save the contents of the visited block in the conditional block + *conditional_block = std::move(*block); + // Impersonate the visited block with a virtual block + *block = std::move(virtual_block); + // Set the end properties of the conditional instruction + conditional_block->end = pc; + conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction - const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block - virtual_block.branch_false = endif_block_id; + block->branch_false = endif_block; // And branch to it from the conditional instruction if it is a branch if (insn_end_class == EndClass::Branch) { - conditional_block.cond = true; - conditional_block.branch_true = endif_block_id; - conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + conditional_block->cond = IR::Condition{true}; + conditional_block->branch_true = endif_block; + conditional_block->branch_false = nullptr; } + // Finally insert the condition block into the list of blocks + functions[function_id].blocks.insert(*conditional_block); } -bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, +bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode) { if (inst.branch.is_cbuf) { throw NotImplementedException("Branch with constant buffer offset"); @@ -500,21 +382,21 @@ bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instr const bool has_flow_test{HasFlowTest(opcode)}; const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - block.cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); - block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + block->cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); } else { - block.cond = true; + block->cond = IR::Condition{true}; } return true; } -void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, +void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute) { const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); + block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { +void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } @@ -528,7 +410,7 @@ void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { } } -CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, +CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; const Predicate pred{inst.Pred()}; @@ -537,41 +419,52 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Locati return AnalysisState::Continue; } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - if (block.stack.Peek(Token::PEXIT).has_value()) { + if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } - if (const std::optional exit_pc{block.stack.Peek(Token::PEXIT)}) { - const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; - block.cond = true; - block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block->branch_false = nullptr; return AnalysisState::Branch; } - block.end = pc; - block.end_class = EndClass::Exit; + block->end = pc; + block->end_class = EndClass::Exit; return AnalysisState::Branch; } -BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { +Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { Function& function{functions[function_id]}; - if (block.begin == pc) { - return block.id; + if (block->begin == pc) { + // Jumps to itself + return block; } - const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; - if (target != function.blocks_data.end()) { - return target->id; + if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { + // Block already exists and it has been visited + return &*it; } - const BlockId block_id{++function.current_block_id}; + // TODO: FIX DANGLING BLOCKS + Block* const new_block{block_pool.Create(Block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{stack}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + })}; function.labels.push_back(Label{ .address{pc}, - .block_id{block_id}, + .block{new_block}, .stack{std::move(stack)}, }); - return block_id; + return new_block; } std::string CFG::Dot() const { @@ -581,18 +474,12 @@ std::string CFG::Dot() const { for (const Function& function : functions) { dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); dot += fmt::format("\t\tnode [style=filled];\n"); - for (const u32 block_index : function.blocks) { - const Block& block{function.blocks_data[block_index]}; + for (const Block& block : function.blocks) { const std::string name{NameOf(block)}; - const auto add_branch = [&](BlockId branch_id, bool add_label) { - const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; - dot += fmt::format("\t\t{}->", name); - if (it == function.blocks_data.end()) { - dot += fmt::format("\"Unknown label {}\"", branch_id); - } else { - dot += NameOf(*it); - }; - if (add_label && block.cond != true && block.cond != false) { + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); + if (add_label && block.cond != IR::Condition{true} && + block.cond != IR::Condition{false}) { dot += fmt::format(" [label=\"{}\"]", block.cond); } dot += '\n'; @@ -600,10 +487,10 @@ std::string CFG::Dot() const { dot += fmt::format("\t\t{};\n", name); switch (block.end_class) { case EndClass::Branch: - if (block.cond != false) { + if (block.cond != IR::Condition{false}) { add_branch(block.branch_true, true); } - if (block.cond != true) { + if (block.cond != IR::Condition{true}) { add_branch(block.branch_false, false); } break; @@ -619,12 +506,6 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; - case EndClass::Unreachable: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += fmt::format( - "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } if (function.entrypoint == 8) { @@ -635,10 +516,11 @@ std::string CFG::Dot() const { dot += "\t}\n"; } if (!functions.empty()) { - if (functions.front().blocks.empty()) { + auto& function{functions.front()}; + if (function.blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 49b369282..8179787b8 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -11,25 +11,27 @@ #include #include +#include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +class Block; +} namespace Shader::Maxwell::Flow { -using BlockId = u32; using FunctionId = size_t; -constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast(-1)}; - enum class EndClass { Branch, Exit, Return, - Unreachable, }; enum class Token { @@ -59,58 +61,37 @@ private: boost::container::small_vector entries; }; -struct Block { +struct Block : boost::intrusive::set_base_hook< + // Normal link is ~2.5% faster compared to safe link + boost::intrusive::link_mode> { [[nodiscard]] bool Contains(Location pc) const noexcept; + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + Location begin; Location end; EndClass end_class; - BlockId id; Stack stack; IR::Condition cond; - BlockId branch_true; - BlockId branch_false; - boost::container::small_vector imm_predecessors; - boost::container::small_vector dominance_frontiers; - union { - bool post_order_visited{false}; - Block* imm_dominator; - }; + Block* branch_true; + Block* branch_false; + IR::Block* ir; }; struct Label { Location address; - BlockId block_id; + Block* block; Stack stack; }; struct Function { Function(Location start_address); - void BuildBlocksMap(); - - void BuildImmediatePredecessors(); - - void BuildPostOrder(); - - void BuildImmediateDominators(); - - void BuildDominanceFrontier(); - - [[nodiscard]] size_t NumBlocks() const noexcept { - return static_cast(current_block_id) + 1; - } - Location entrypoint; - BlockId current_block_id{0}; boost::container::small_vector labels; - boost::container::small_vector blocks; - boost::container::small_vector blocks_data; - // Translates from BlockId to block index - boost::container::small_vector blocks_map; - - boost::container::small_vector post_order_blocks; - boost::container::small_vector post_order_map; + boost::intrusive::set blocks; }; class CFG { @@ -120,7 +101,7 @@ class CFG { }; public: - explicit CFG(Environment& env, Location start_address); + explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -133,35 +114,37 @@ public: [[nodiscard]] std::span Functions() const noexcept { return std::span(functions.data(), functions.size()); } + [[nodiscard]] std::span Functions() noexcept { + return std::span(functions.data(), functions.size()); + } private: - void VisitFunctions(Location start_address); - void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); - void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode); - void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); - AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); + Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); Environment& env; + ObjectPool& block_pool; boost::container::small_vector functions; FunctionId current_function_id{0}; }; diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h index 66b51a19e..26d29eae2 100644 --- a/src/shader_recompiler/frontend/maxwell/location.h +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -15,7 +15,7 @@ namespace Shader::Maxwell { class Location { - static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits::max()}; + static constexpr u32 VIRTUAL_BIAS{4}; public: constexpr Location() = default; @@ -27,12 +27,18 @@ public: Align(); } + constexpr Location Virtual() const noexcept { + Location virtual_location; + virtual_location.offset = offset - VIRTUAL_BIAS; + return virtual_location; + } + [[nodiscard]] constexpr u32 Offset() const noexcept { return offset; } [[nodiscard]] constexpr bool IsVirtual() const { - return offset == VIRTUAL_OFFSET; + return offset % 8 == VIRTUAL_BIAS; } constexpr auto operator<=>(const Location&) const noexcept = default; @@ -89,7 +95,7 @@ private: offset -= 8 + (offset % 32 == 8 ? 8 : 0); } - u32 offset{VIRTUAL_OFFSET}; + u32 offset{0xcccccccc}; }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8cdd20804..9fa912ed8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,57 +4,58 @@ #include #include +#include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { namespace { -void TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span block_map) { +IR::BlockList TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::Function& cfg_function) { const size_t num_blocks{cfg_function.blocks.size()}; - function.blocks.reserve(num_blocks); - - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - - IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; - block_map[flow_block.id] = ir_block; - function.blocks.emplace_back(ir_block); - } -} - -void EmitTerminationInsts(const Flow::Function& cfg_function, - std::span block_map) { - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } -} - -void TranslateFunction(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::Function& cfg_function, - IR::Function& function) { - std::vector block_map; - block_map.resize(cfg_function.blocks_data.size()); - - TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); - EmitTerminationInsts(cfg_function, block_map); + std::vector blocks(cfg_function.blocks.size()); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + const u32 begin{cfg_block.begin.Offset()}; + const u32 end{cfg_block.end.Offset()}; + blocks[i] = block_pool.Create(inst_pool, begin, end); + cfg_block.ir = blocks[i]; + ++i; + }); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + IR::Block* const block{blocks[i]}; + ++i; + if (cfg_block.end_class != Flow::EndClass::Branch) { + block->SetReturn(); + } else if (cfg_block.cond == IR::Condition{true}) { + block->SetBranch(cfg_block.branch_true->ir); + } else if (cfg_block.cond == IR::Condition{false}) { + block->SetBranch(cfg_block.branch_false->ir); + } else { + block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, + cfg_block.branch_false->ir); + } + }); + return IR::VisitAST(inst_pool, block_pool, blocks, + [&](IR::Block* block) { Translate(env, block); }); } } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg) { IR::Program program; auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); - for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); + for (Flow::Function& cfg_function : cfg.Functions()) { + functions.push_back(IR::Function{ + .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, + }); } + + fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program)); std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 3355ab129..542621a1d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -19,6 +19,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - const Flow::CFG& cfg); + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp deleted file mode 100644 index ed5137f20..000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" - -namespace Shader::Maxwell { - -static void EmitExit(IR::IREmitter& ir) { - ir.Exit(); -} - -static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { - switch (flow_test) { - case IR::FlowTest::T: - return ir.Imm1(true); - case IR::FlowTest::F: - return ir.Imm1(false); - case IR::FlowTest::NE: - // FIXME: Verify this - return ir.LogicalNot(ir.GetZFlag()); - case IR::FlowTest::NaN: - // FIXME: Verify this - return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); - default: - throw NotImplementedException("Flow test {}", flow_test); - } -} - -static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { - const IR::FlowTest flow_test{cond.FlowTest()}; - const auto [pred, pred_negated]{cond.Pred()}; - if (pred == IR::Pred::PT && !pred_negated) { - return GetFlowTest(flow_test, ir); - } - if (flow_test == IR::FlowTest::T) { - return ir.GetPred(pred, pred_negated); - } - return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); -} - -static void EmitBranch(const Flow::Block& flow_block, std::span block_map, - IR::IREmitter& ir) { - const auto add_immediate_predecessor = [&](Flow::BlockId label) { - block_map[label]->AddImmediatePredecessor(&ir.block); - }; - if (flow_block.cond == true) { - add_immediate_predecessor(flow_block.branch_true); - return ir.Branch(block_map[flow_block.branch_true]); - } - if (flow_block.cond == false) { - add_immediate_predecessor(flow_block.branch_false); - return ir.Branch(block_map[flow_block.branch_false]); - } - add_immediate_predecessor(flow_block.branch_true); - add_immediate_predecessor(flow_block.branch_false); - return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], - block_map[flow_block.branch_false]); -} - -void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map) { - IR::Block* const block{block_map[flow_block.id]}; - IR::IREmitter ir(*block); - switch (flow_block.end_class) { - case Flow::EndClass::Branch: - EmitBranch(flow_block, block_map, ir); - break; - case Flow::EndClass::Exit: - EmitExit(ir); - break; - case Flow::EndClass::Return: - ir.Return(); - break; - case Flow::EndClass::Unreachable: - ir.Unreachable(); - break; - } -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h deleted file mode 100644 index 04e044534..000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" - -namespace Shader::Maxwell { - -/// Emit termination instructions and collect immediate predecessors -void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d4b417d14..b752785d4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -28,7 +28,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { IR::U32 result; if (shl.w != 0) { // When .W is set, the shift value is wrapped - // To emulate this we just have to clamp it ourselves. + // To emulate this we just have to wrap it ourselves. const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; result = v.ir.ShiftLeftLogical(base, shift); } else { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 7e6bb07a2..f1230f58f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,14 +23,13 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(ObjectPool& inst_pool, Environment& env, - const Flow::Block& flow_block) { - IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; - TranslatorVisitor visitor{env, block}; - - const Location pc_end{flow_block.end}; - Location pc{flow_block.begin}; - while (pc != pc_end) { +void Translate(Environment& env, IR::Block* block) { + if (block->IsVirtual()) { + return; + } + TranslatorVisitor visitor{env, *block}; + const Location pc_end{block->LocationEnd()}; + for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { @@ -43,9 +42,7 @@ IR::Block Translate(ObjectPool& inst_pool, Environment& env, default: throw LogicError("Invalid opcode {}", opcode); } - ++pc; } - return block; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index c1c21b278..e1aa2e0f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,14 +6,9 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(ObjectPool& inst_pool, Environment& env, - const Flow::Block& flow_block); +void Translate(Environment& env, IR::Block* block); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f1170c61e..9fba6ac23 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -132,6 +132,32 @@ void FoldLogicalAnd(IR::Inst& inst) { } } +void FoldLogicalOr(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(IR::Value{true}); + } else { + inst.ReplaceUsesWith(inst.Arg(0)); + } + } +} + +void FoldLogicalNot(IR::Inst& inst) { + const IR::U1 value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{!value.U1()}); + return; + } + IR::Inst* const arg{value.InstRecursive()}; + if (arg->Opcode() == IR::Opcode::LogicalNot) { + inst.ReplaceUsesWith(arg->Arg(0)); + } +} + template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; @@ -160,6 +186,24 @@ void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); } +void FoldBranchConditional(IR::Inst& inst) { + const IR::U1 cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + // TODO: Convert to Branch + return; + } + const IR::Inst* cond_inst{cond.InstRecursive()}; + if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + const IR::Value true_label{inst.Arg(1)}; + const IR::Value false_label{inst.Arg(2)}; + // Remove negation on the conditional (take the parameter out of LogicalNot) and swap + // the branches + inst.SetArg(0, cond_inst->Arg(0)); + inst.SetArg(1, false_label); + inst.SetArg(2, true_label); + } +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -178,6 +222,10 @@ void ConstantPropagation(IR::Inst& inst) { return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::LogicalOr: + return FoldLogicalOr(inst); + case IR::Opcode::LogicalNot: + return FoldLogicalNot(inst); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: @@ -188,6 +236,8 @@ void ConstantPropagation(IR::Inst& inst) { } return (base >> shift) & ((1U << count) - 1); }); + case IR::Opcode::BranchConditional: + return FoldBranchConditional(inst); default: break; } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 15a9db90a..8ca996e93 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -34,6 +34,13 @@ struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; +struct GotoVariable : FlagTag { + GotoVariable() = default; + explicit GotoVariable(u32 index_) : index{index_} {} + + u32 index; +}; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -43,6 +50,10 @@ struct DefTable { return preds[IR::PredIndex(variable)]; } + [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) { + return goto_vars[goto_variable.index]; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -61,6 +72,7 @@ struct DefTable { std::array regs; std::array preds; + boost::container::flat_map goto_vars; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -68,15 +80,15 @@ struct DefTable { }; IR::Opcode UndefOpcode(IR::Reg) noexcept { - return IR::Opcode::Undef32; + return IR::Opcode::UndefU32; } IR::Opcode UndefOpcode(IR::Pred) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } IR::Opcode UndefOpcode(const FlagTag&) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { @@ -165,6 +177,9 @@ void SsaRewritePass(IR::Function& function) { pass.WriteVariable(pred, block, inst.Arg(1)); } break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -187,6 +202,9 @@ void SsaRewritePass(IR::Function& function) { inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 8a5adf5a2..32b56eb57 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,6 +14,10 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Function& function) { for (const auto& block : function.blocks) { for (const IR::Inst& inst : *block) { + if (inst.Opcode() == IR::Opcode::Phi) { + // Skip validation on phi nodes + continue; + } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 9887e066d..3ca1677c4 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -36,34 +37,46 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique(path.string().c_str())); }); - for (int i = 0; i < 300; ++i) { + auto block_pool{std::make_unique>()}; + auto t0 = std::chrono::high_resolution_clock::now(); + int N = 1; + int n = 0; + for (int i = 0; i < N; ++i) { for (auto& env : map) { + ++n; // fmt::print(stdout, "Decoding {}\n", path.string()); + const Location start_address{0}; - auto cfg{std::make_unique(*env, start_address)}; + block_pool->ReleaseContents(); + Flow::CFG cfg{*env, *block_pool, start_address}; // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); // const std::string code{EmitGLASM(program)}; } } + auto t = std::chrono::high_resolution_clock::now(); + fmt::print(stdout, "{} ms", + std::chrono::duration_cast(t - t0).count() / double(N)); } int main() { // RunDatabase(); + auto flow_block_pool{std::make_unique>()}; auto inst_pool{std::make_unique>()}; auto block_pool{std::make_unique>()}; - // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + // FileEnvironment env{"D:\\Shaders\\shader.bin"}; for (int i = 0; i < 1; ++i) { block_pool->ReleaseContents(); inst_pool->ReleaseContents(); - auto cfg{std::make_unique(env, 0)}; - // fmt::print(stdout, "{}\n", cfg->Dot()); - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; - // fmt::print(stdout, "{}\n", IR::DumpProgram(program)); + flow_block_pool->ReleaseContents(); + Flow::CFG cfg{env, *flow_block_pool, 0}; + fmt::print(stdout, "{}\n", cfg.Dot()); + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); Backend::SPIRV::EmitSPIRV spirv{program}; } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h new file mode 100644 index 000000000..1760bf4a9 --- /dev/null +++ b/src/shader_recompiler/shader_info.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +namespace Shader { + +struct Info { + struct ConstantBuffer { + + }; + + struct { + bool workgroup_id{}; + bool local_invocation_id{}; + bool fp16{}; + bool fp64{}; + } uses; + + std::array<18 +}; + +} // namespace Shader From 8af9297f0972d0aaa8306369c5d04926b886a89e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 01:24:32 -0300 Subject: [PATCH 013/770] shader: Misc fixes --- .../backend/spirv/emit_spirv.cpp | 6 + .../backend/spirv/emit_spirv.h | 5 + .../frontend/ir/basic_block.cpp | 4 +- .../frontend/ir/ir_emitter.cpp | 2 + .../frontend/ir/microinstruction.cpp | 16 +-- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../translate/impl/integer_set_predicate.cpp | 4 +- .../ir_opt/constant_propagation_pass.cpp | 27 ++--- .../ir_opt/ssa_rewrite_pass.cpp | 113 +++++++++--------- src/shader_recompiler/main.cpp | 12 +- 10 files changed, 104 insertions(+), 89 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5022b5159..e29e448c7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -25,6 +25,9 @@ EmitContext::EmitContext(IR::Program& program) { f16.Define(*this, TypeFloat(16), "f16"); f64.Define(*this, TypeFloat(64), "f64"); + true_value = ConstantTrue(u1); + false_value = ConstantFalse(u1); + for (const IR::Function& function : program.functions) { for (IR::Block* const block : function.blocks) { block_label_map.emplace_back(block, OpLabel()); @@ -58,6 +61,7 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { std::fclose(file); std::system("spirv-dis shader.spv"); std::system("spirv-val shader.spv"); + std::system("spirv-cross shader.spv"); } template @@ -109,6 +113,8 @@ static Id TypeId(const EmitContext& ctx, IR::Type type) { switch (type) { case IR::Type::U1: return ctx.u1; + case IR::Type::U32: + return ctx.u32[1]; default: throw NotImplementedException("Phi node type {}", type); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 9aa83b5de..46ec7a1bb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -79,6 +79,8 @@ public: return def_map.Consume(value.Inst()); } switch (value.Type()) { + case IR::Type::U1: + return value.U1() ? true_value : false_value; case IR::Type::U32: return Constant(u32[1], value.U32()); case IR::Type::F32: @@ -108,6 +110,9 @@ public: VectorTypes f16; VectorTypes f64; + Id true_value{}; + Id false_value{}; + Id workgroup_id{}; Id local_invocation_id{}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index b5616f394..c97626712 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -113,7 +113,7 @@ static std::string ArgToIndex(const std::map& block_to_ind if (arg.IsLabel()) { return BlockToIndex(block_to_index, arg.Label()); } - if (!arg.IsImmediate()) { + if (!arg.IsImmediate() || arg.IsIdentity()) { return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { @@ -166,7 +166,7 @@ std::string DumpBlock(const Block& block, const std::map& const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { - ret += fmt::format("[ {}, {} ]", arg_index, + ret += fmt::format("[ {}, {} ]", arg_str, BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); } else { ret += arg_str; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 30932043f..f42489d41 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -46,10 +46,12 @@ F64 IREmitter::Imm64(f64 value) const { void IREmitter::Branch(Block* label) { label->AddImmediatePredecessor(block); + block->SetBranch(label); Inst(Opcode::Branch, label); } void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + block->SetBranches(IR::Condition{true}, true_label, false_label); true_label->AddImmediatePredecessor(block); false_label->AddImmediatePredecessor(block); Inst(Opcode::BranchConditional, condition, true_label, false_label); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b4ae371bd..9279b9692 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -143,19 +143,21 @@ Value Inst::Arg(size_t index) const { } void Inst::SetArg(size_t index, Value value) { - if (op == Opcode::Phi) { - throw LogicError("Setting argument on a phi instruction"); - } - if (index >= NumArgsOf(op)) { + if (index >= NumArgs()) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); } - if (!args[index].IsImmediate()) { - UndoUse(args[index]); + const IR::Value arg{Arg(index)}; + if (!arg.IsImmediate()) { + UndoUse(arg); } if (!value.IsImmediate()) { Use(value); } - args[index] = value; + if (op == Opcode::Phi) { + phi_args[index].second = value; + } else { + args[index] = value; + } } Block* Inst::PhiBlock(size_t index) const { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 60f79b160..623e78ff8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -76,8 +76,8 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } // Anonymous namespace -void TranslatorVisitor::IADD_reg(u64) { - throw NotImplementedException("IADD (reg)"); +void TranslatorVisitor::IADD_reg(u64 insn) { + IADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::IADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 76c6b5291..1bc9ef363 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -92,8 +92,8 @@ void TranslatorVisitor::ISETP_cbuf(u64 insn) { ISETP(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::ISETP_imm(u64) { - throw NotImplementedException("ISETP_imm"); +void TranslatorVisitor::ISETP_imm(u64 insn) { + ISETP(*this, insn, GetImm20(insn)); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9fba6ac23..cbde65b9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -32,6 +32,8 @@ template return value.U1(); } else if constexpr (std::is_same_v) { return value.U32(); + } else if constexpr (std::is_same_v) { + return static_cast(value.U32()); } else if constexpr (std::is_same_v) { return value.F32(); } else if constexpr (std::is_same_v) { @@ -39,17 +41,8 @@ template } } -template +template bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { - const auto arg = [](const IR::Value& value) { - if constexpr (std::is_invocable_r_v) { - return value.U1(); - } else if constexpr (std::is_invocable_r_v) { - return value.U32(); - } else if constexpr (std::is_invocable_r_v) { - return value.U64(); - } - }; const IR::Value lhs{inst.Arg(0)}; const IR::Value rhs{inst.Arg(1)}; @@ -57,14 +50,14 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { const bool is_rhs_immediate{rhs.IsImmediate()}; if (is_lhs_immediate && is_rhs_immediate) { - const auto result{imm_fn(arg(lhs), arg(rhs))}; + const auto result{imm_fn(Arg(lhs), Arg(rhs))}; inst.ReplaceUsesWith(IR::Value{result}); return false; } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { - const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; + const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); } else { @@ -76,7 +69,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { - const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; + const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); } @@ -101,7 +94,7 @@ void FoldAdd(IR::Inst& inst) { if (inst.HasAssociatedPseudoOperation()) { return; } - if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { + if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -119,7 +112,7 @@ void FoldSelect(IR::Inst& inst) { } void FoldLogicalAnd(IR::Inst& inst) { - if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -133,7 +126,7 @@ void FoldLogicalAnd(IR::Inst& inst) { } void FoldLogicalOr(IR::Inst& inst) { - if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -226,6 +219,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldLogicalOr(inst); case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); + case IR::Opcode::SLessThan: + return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 8ca996e93..7eaf719c4 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -113,6 +113,7 @@ private: IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { IR::Value val; if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed val = ReadVariable(variable, preds.front()); } else { // Break potential cycles with operandless phi @@ -160,66 +161,70 @@ private: DefTable current_def; }; + +void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; + case IR::Opcode::SetZFlag: + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetSFlag: + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetCFlag: + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetOFlag: + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); + } + break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; + case IR::Opcode::GetZFlag: + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); + break; + case IR::Opcode::GetSFlag: + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); + break; + case IR::Opcode::GetCFlag: + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); + break; + case IR::Opcode::GetOFlag: + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); + break; + default: + break; + } +} } // Anonymous namespace void SsaRewritePass(IR::Function& function) { Pass pass; for (IR::Block* const block : function.blocks) { for (IR::Inst& inst : block->Instructions()) { - switch (inst.Opcode()) { - case IR::Opcode::SetRegister: - if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - pass.WriteVariable(reg, block, inst.Arg(1)); - } - break; - case IR::Opcode::SetPred: - if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - pass.WriteVariable(pred, block, inst.Arg(1)); - } - break; - case IR::Opcode::SetGotoVariable: - pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); - break; - case IR::Opcode::SetZFlag: - pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetSFlag: - pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetCFlag: - pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetOFlag: - pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::GetRegister: - if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); - } - break; - case IR::Opcode::GetPred: - if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); - } - break; - case IR::Opcode::GetGotoVariable: - inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); - break; - case IR::Opcode::GetZFlag: - inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); - break; - case IR::Opcode::GetSFlag: - inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); - break; - case IR::Opcode::GetCFlag: - inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); - break; - case IR::Opcode::GetOFlag: - inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); - break; - default: - break; - } + VisitInst(pass, block, inst); } } } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 3ca1677c4..92358232c 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -38,7 +38,8 @@ void RunDatabase() { map.emplace_back(std::make_unique(path.string().c_str())); }); auto block_pool{std::make_unique>()}; - auto t0 = std::chrono::high_resolution_clock::now(); + using namespace std::chrono; + auto t0 = high_resolution_clock::now(); int N = 1; int n = 0; for (int i = 0; i < N; ++i) { @@ -55,9 +56,8 @@ void RunDatabase() { // const std::string code{EmitGLASM(program)}; } } - auto t = std::chrono::high_resolution_clock::now(); - fmt::print(stdout, "{} ms", - std::chrono::duration_cast(t - t0).count() / double(N)); + auto t = high_resolution_clock::now(); + fmt::print(stdout, "{} ms", duration_cast(t - t0).count() / double(N)); } int main() { @@ -67,8 +67,8 @@ int main() { auto inst_pool{std::make_unique>()}; auto block_pool{std::make_unique>()}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - // FileEnvironment env{"D:\\Shaders\\shader.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + FileEnvironment env{"D:\\Shaders\\shader.bin"}; for (int i = 0; i < 1; ++i) { block_pool->ReleaseContents(); inst_pool->ReleaseContents(); From cbfb7d182a4e90e4e263696d1fca35e47d3eabb4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 20:15:42 -0300 Subject: [PATCH 014/770] shader: Support SSA loops on IR --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_spirv.h | 12 ++-- .../backend/spirv/emit_spirv_integer.cpp | 8 +-- .../backend/spirv/emit_spirv_undefined.cpp | 10 +-- src/shader_recompiler/frontend/ir/function.h | 1 + .../frontend/ir/post_order.cpp | 48 ++++++++++++++ .../frontend/ir/post_order.h | 13 ++++ .../frontend/maxwell/program.cpp | 12 ++-- .../ir_opt/dead_code_elimination_pass.cpp | 2 +- src/shader_recompiler/ir_opt/passes.h | 8 ++- .../ir_opt/ssa_rewrite_pass.cpp | 62 +++++++++++++++---- src/shader_recompiler/main.cpp | 18 +++--- 12 files changed, 150 insertions(+), 46 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/post_order.cpp create mode 100644 src/shader_recompiler/frontend/ir/post_order.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 27fc79e21..e1f4276a1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -32,6 +32,8 @@ add_executable(shader_recompiler frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc + frontend/ir/post_order.cpp + frontend/ir/post_order.h frontend/ir/pred.h frontend/ir/program.cpp frontend/ir/program.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 46ec7a1bb..6b09757d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -159,10 +159,10 @@ private: Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitUndefU1(EmitContext& ctx); - void EmitUndefU8(EmitContext& ctx); - void EmitUndefU16(EmitContext& ctx); - void EmitUndefU32(EmitContext& ctx); - void EmitUndefU64(EmitContext& ctx); + Id EmitUndefU8(EmitContext& ctx); + Id EmitUndefU16(EmitContext& ctx); + Id EmitUndefU32(EmitContext& ctx); + Id EmitUndefU64(EmitContext& ctx); void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); @@ -297,12 +297,12 @@ private: void EmitBitFieldInsert(EmitContext& ctx); void EmitBitFieldSExtract(EmitContext& ctx); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); - void EmitSLessThan(EmitContext& ctx); + Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); void EmitULessThan(EmitContext& ctx); void EmitIEqual(EmitContext& ctx); void EmitSLessThanEqual(EmitContext& ctx); void EmitULessThanEqual(EmitContext& ctx); - void EmitSGreaterThan(EmitContext& ctx); + Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); void EmitUGreaterThan(EmitContext& ctx); void EmitINotEqual(EmitContext& ctx); void EmitSGreaterThanEqual(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 3ef4f3d78..e811a63ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -73,8 +73,8 @@ Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id coun return ctx.OpBitFieldUExtract(ctx.u32[1], base, offset, count); } -void EmitSPIRV::EmitSLessThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.u1, lhs, rhs); } void EmitSPIRV::EmitULessThan(EmitContext&) { @@ -93,8 +93,8 @@ void EmitSPIRV::EmitULessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSGreaterThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThan(ctx.u1, lhs, rhs); } void EmitSPIRV::EmitUGreaterThan(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 859b60a95..a6f542360 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -10,19 +10,19 @@ Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { return ctx.OpUndef(ctx.u1); } -void EmitSPIRV::EmitUndefU8(EmitContext&) { +Id EmitSPIRV::EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndefU16(EmitContext&) { +Id EmitSPIRV::EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndefU32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { + return ctx.OpUndef(ctx.u32[1]); } -void EmitSPIRV::EmitUndefU64(EmitContext&) { +Id EmitSPIRV::EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index fd7d56419..d1f061146 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -12,6 +12,7 @@ namespace Shader::IR { struct Function { BlockList blocks; + BlockList post_order_blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp new file mode 100644 index 000000000..a48b8dec5 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" + +namespace Shader::IR { + +BlockList PostOrder(const BlockList& blocks) { + boost::container::small_vector block_stack; + boost::container::flat_set visited; + + BlockList post_order_blocks; + post_order_blocks.reserve(blocks.size()); + + Block* const first_block{blocks.front()}; + visited.insert(first_block); + block_stack.push_back(first_block); + + const auto visit_branch = [&](Block* block, Block* branch) { + if (!branch) { + return false; + } + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }; + while (!block_stack.empty()) { + Block* const block{block_stack.back()}; + block_stack.pop_back(); + + if (!visit_branch(block, block->TrueBranch()) && + !visit_branch(block, block->FalseBranch())) { + post_order_blocks.push_back(block); + } + } + return post_order_blocks; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h new file mode 100644 index 000000000..30137ff57 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +BlockList PostOrder(const BlockList& blocks); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 9fa912ed8..dab6d68c0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -56,11 +57,14 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" namespace Shader::Optimization { template -void Invoke(Func&& func, IR::Function& function) { - for (const auto& block : function.blocks) { +void PostOrderInvoke(Func&& func, IR::Function& function) { + for (const auto& block : function.post_order_blocks) { func(*block); } } @@ -20,7 +22,7 @@ void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); void GlobalMemoryToStorageBufferPass(IR::Block& block); void IdentityRemovalPass(IR::Function& function); -void SsaRewritePass(IR::Function& function); +void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7eaf719c4..13f9c914a 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,7 +14,13 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // +#include +#include +#include +#include + #include +#include #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" @@ -26,9 +32,9 @@ namespace Shader::Optimization { namespace { -using ValueMap = boost::container::flat_map>; - -struct FlagTag {}; +struct FlagTag { + auto operator<=>(const FlagTag&) const noexcept = default; +}; struct ZeroFlagTag : FlagTag {}; struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; @@ -38,9 +44,15 @@ struct GotoVariable : FlagTag { GotoVariable() = default; explicit GotoVariable(u32 index_) : index{index_} {} + auto operator<=>(const GotoVariable&) const noexcept = default; + u32 index; }; +using Variant = std::variant; +using ValueMap = boost::container::flat_map>; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -102,19 +114,35 @@ public: } IR::Value ReadVariable(auto variable, IR::Block* block) { - auto& def{current_def[variable]}; + const ValueMap& def{current_def[variable]}; if (const auto it{def.find(block)}; it != def.end()) { return it->second; } return ReadVariableRecursive(variable, block); } + void SealBlock(IR::Block* block) { + const auto it{incomplete_phis.find(block)}; + if (it != incomplete_phis.end()) { + for (auto& [variant, phi] : it->second) { + std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); + } + } + sealed_blocks.insert(block); + } + private: IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { IR::Value val; - if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + if (!sealed_blocks.contains(block)) { + // Incomplete CFG + IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + incomplete_phis[block].insert_or_assign(variable, phi); + val = IR::Value{&*phi}; + } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + imm_preds.size() == 1) { // Optimize the common case of one predecessor: no phi needed - val = ReadVariable(variable, preds.front()); + val = ReadVariable(variable, imm_preds.front()); } else { // Break potential cycles with operandless phi IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; @@ -127,8 +155,8 @@ private: } IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { - for (IR::Block* const pred : block->ImmediatePredecessors()) { - phi.AddPhiOperand(pred, ReadVariable(variable, pred)); + for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } @@ -159,6 +187,9 @@ private: return same; } + boost::container::flat_set sealed_blocks; + boost::container::flat_map> + incomplete_phis; DefTable current_def; }; @@ -218,14 +249,19 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { break; } } + +void VisitBlock(Pass& pass, IR::Block* block) { + for (IR::Inst& inst : block->Instructions()) { + VisitInst(pass, block, inst); + } + pass.SealBlock(block); +} } // Anonymous namespace -void SsaRewritePass(IR::Function& function) { +void SsaRewritePass(std::span post_order_blocks) { Pass pass; - for (IR::Block* const block : function.blocks) { - for (IR::Inst& inst : block->Instructions()) { - VisitInst(pass, block, inst); - } + for (IR::Block* const block : post_order_blocks | std::views::reverse) { + VisitBlock(pass, block); } } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 92358232c..29f65966c 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -69,14 +69,12 @@ int main() { // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; FileEnvironment env{"D:\\Shaders\\shader.bin"}; - for (int i = 0; i < 1; ++i) { - block_pool->ReleaseContents(); - inst_pool->ReleaseContents(); - flow_block_pool->ReleaseContents(); - Flow::CFG cfg{env, *flow_block_pool, 0}; - fmt::print(stdout, "{}\n", cfg.Dot()); - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; - fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - Backend::SPIRV::EmitSPIRV spirv{program}; - } + block_pool->ReleaseContents(); + inst_pool->ReleaseContents(); + flow_block_pool->ReleaseContents(); + Flow::CFG cfg{env, *flow_block_pool, 0}; + fmt::print(stdout, "{}\n", cfg.Dot()); + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); + // Backend::SPIRV::EmitSPIRV spirv{program}; } From 1b0cf2309c760c1cb97a230a1572f8e87f84444a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 22:46:40 -0300 Subject: [PATCH 015/770] shader: Add support for forward declarations --- externals/sirit | 2 +- .../backend/spirv/emit_spirv.cpp | 61 +++++++++++++++---- .../backend/spirv/emit_spirv.h | 40 +----------- .../frontend/ir/basic_block.cpp | 2 +- .../frontend/ir/basic_block.h | 2 +- .../frontend/ir/ir_emitter.h | 4 +- .../frontend/ir/microinstruction.cpp | 2 +- .../frontend/ir/microinstruction.h | 20 +++++- src/shader_recompiler/frontend/ir/modifiers.h | 10 +-- .../global_memory_to_storage_buffer_pass.cpp | 4 +- src/shader_recompiler/main.cpp | 2 +- 11 files changed, 80 insertions(+), 69 deletions(-) diff --git a/externals/sirit b/externals/sirit index c374bfd9f..f819ade0e 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e +Subproject commit f819ade0efe925a782090dea9e1bf300fedffb39 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e29e448c7..0895414b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -64,31 +64,49 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { std::system("spirv-cross shader.spv"); } +template +static void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { + const Id forward_id{inst->Definition()}; + const bool has_forward_id{Sirit::ValidId(forward_id)}; + Id current_id{}; + if (has_forward_id) { + current_id = ctx.ExchangeCurrentId(forward_id); + } + const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; + if (has_forward_id) { + ctx.ExchangeCurrentId(current_id); + } else { + inst->SetDefinition(new_id); + } +} + template static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { using M = decltype(method); using std::is_invocable_r_v; if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx)); + SetDefinition(emit, ctx, inst); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)))); + SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0))); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2)))); + SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2))); + } else if constexpr (is_invocable_r_v) { + SetDefinition(emit, ctx, inst, inst); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + SetDefinition(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2)))); + SetDefinition(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2))); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), inst->Arg(1).U32())); + SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), inst->Arg(1).U32()); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0))); + SetDefinition(emit, ctx, inst, inst->Arg(0)); } else if constexpr (is_invocable_r_v) { - ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0), inst->Arg(1))); + SetDefinition(emit, ctx, inst, inst->Arg(0), inst->Arg(1)); } else if constexpr (is_invocable_r_v) { (emit.*method)(ctx, inst); } else if constexpr (is_invocable_r_v) { @@ -122,11 +140,28 @@ static Id TypeId(const EmitContext& ctx, IR::Type type) { Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; - boost::container::small_vector operands; + boost::container::small_vector operands; operands.reserve(num_args * 2); for (size_t index = 0; index < num_args; ++index) { + // Phi nodes can have forward declarations, if an argument is not defined provide a forward + // declaration of it. Invoke will take care of giving it the right definition when it's + // actually defined. + const IR::Value arg{inst->Arg(index)}; + Id def{}; + if (arg.IsImmediate()) { + // Let the context handle immediate definitions, as it already knows how + def = ctx.Def(arg); + } else { + IR::Inst* const arg_inst{arg.Inst()}; + def = arg_inst->Definition(); + if (!Sirit::ValidId(def)) { + // If it hasn't been defined, get a forward declaration + def = ctx.ForwardDeclarationId(); + arg_inst->SetDefinition(def); + } + } IR::Block* const phi_block{inst->PhiBlock(index)}; - operands.push_back(ctx.Def(inst->Arg(index))); + operands.push_back(def); operands.push_back(ctx.BlockLabel(phi_block)); } const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 6b09757d1..7d76377b5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -6,8 +6,6 @@ #include -#include - #include "common/common_types.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" @@ -16,37 +14,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -class DefMap { -public: - void Define(IR::Inst* inst, Id def_id) { - const InstInfo info{.use_count{inst->UseCount()}, .def_id{def_id}}; - const auto it{map.insert(map.end(), std::make_pair(inst, info))}; - if (it == map.end()) { - throw LogicError("Defining already defined instruction"); - } - } - - [[nodiscard]] Id Consume(IR::Inst* inst) { - const auto it{map.find(inst)}; - if (it == map.end()) { - throw LogicError("Consuming undefined instruction"); - } - const Id def_id{it->second.def_id}; - if (--it->second.use_count == 0) { - map.erase(it); - } - return def_id; - } - -private: - struct InstInfo { - int use_count; - Id def_id; - }; - - boost::container::flat_map map; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -76,7 +43,7 @@ public: [[nodiscard]] Id Def(const IR::Value& value) { if (!value.IsImmediate()) { - return def_map.Consume(value.Inst()); + return value.Inst()->Definition(); } switch (value.Type()) { case IR::Type::U1: @@ -90,10 +57,6 @@ public: } } - void Define(IR::Inst* inst, Id def_id) { - def_map.Define(inst, def_id); - } - [[nodiscard]] Id BlockLabel(IR::Block* block) const { const auto it{std::ranges::lower_bound(block_label_map, block, {}, &std::pair::first)}; @@ -117,7 +80,6 @@ public: Id local_invocation_id{}; private: - DefMap def_map; std::vector> block_label_map; }; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index c97626712..5ae91dd7d 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -26,7 +26,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args, u64 flags) { + std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3205705e7..778b32e43 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -42,7 +42,7 @@ public: /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args = {}, u64 flags = 0); + std::initializer_list args = {}, u32 flags = 0); /// Set the branches to jump to when all instructions have executed. void SetBranches(Condition cond, Block* branch_true, Block* branch_false); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4decb46bc..24b012a39 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -178,7 +178,7 @@ private: } template - requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v) struct Flags { + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v) struct Flags { Flags() = default; Flags(T proxy_) : proxy{proxy_} {} @@ -187,7 +187,7 @@ private: template T Inst(Opcode op, Flags flags, Args... args) { - u64 raw_flags{}; + u32 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 9279b9692..ee76db9ad 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -31,7 +31,7 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) inst = nullptr; } -Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} { +Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { std::construct_at(&phi_args); } else { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index ddf0f90a9..5b244fa0b 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -12,6 +12,7 @@ #include +#include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/type.h" @@ -25,7 +26,7 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_, u64 flags_) noexcept; + explicit Inst(Opcode op_, u32 flags_) noexcept; ~Inst(); Inst& operator=(const Inst&) = delete; @@ -86,13 +87,25 @@ public: void ReplaceUsesWith(Value replacement); template - requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v) + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; std::memcpy(&ret, &flags, sizeof(ret)); return ret; } + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} @@ -103,7 +116,8 @@ private: IR::Opcode op{}; int use_count{}; - u64 flags{}; + u32 flags{}; + u32 definition{}; union { NonTriviallyDummy dummy{}; std::array args; diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 28bb9e798..c288eede0 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -6,13 +6,13 @@ namespace Shader::IR { -enum class FmzMode { +enum class FmzMode : u8 { None, // Denorms are not flushed, NAN is propagated (nouveau) FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) }; -enum class FpRounding { +enum class FpRounding : u8 { RN, // Round to nearest even, RM, // Round towards negative infinity RP, // Round towards positive infinity @@ -21,8 +21,8 @@ enum class FpRounding { struct FpControl { bool no_contraction{false}; - FpRounding rounding : 8 = FpRounding::RN; - FmzMode fmz_mode : 8 = FmzMode::FTZ; + FpRounding rounding{FpRounding::RN}; + FmzMode fmz_mode{FmzMode::FTZ}; }; -static_assert(sizeof(FpControl) <= sizeof(u64)); +static_assert(sizeof(FpControl) <= sizeof(u32)); } // namespace Shader::IR diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 34393e1d5..08fd364bb 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -161,8 +161,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) return std::nullopt; } const StorageBufferAddr storage_buffer{ - .index = index.U32(), - .offset = offset.U32(), + .index{index.U32()}, + .offset{offset.U32()}, }; if (bias && !MeetsBias(storage_buffer, *bias)) { // We have to blacklist some addresses in case we wrongly point to them diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 29f65966c..3b110af61 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - // Backend::SPIRV::EmitSPIRV spirv{program}; + Backend::SPIRV::EmitSPIRV spirv{program}; } From 1c0b8bca5e1d2af65dff66c19b7ebb3060ce1229 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Feb 2021 00:07:52 -0300 Subject: [PATCH 016/770] shader: Fix tracking --- .../global_memory_to_storage_buffer_pass.cpp | 122 +++++++++++------- 1 file changed, 72 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 08fd364bb..b40c0c57b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -142,6 +142,58 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { } } +struct LowAddrInfo { + IR::U32 value; + s32 imm_offset; +}; + +/// Tries to track the first 32-bits of a global memory instruction +std::optional TrackLowAddress(IR::Inst* inst) { + // The first argument is the low level GPU pointer to the global memory instruction + const IR::U64 addr{inst->Arg(0)}; + if (addr.IsImmediate()) { + // Not much we can do if it's an immediate + return std::nullopt; + } + // This address is expected to either be a PackUint2x32 or a IAdd64 + IR::Inst* addr_inst{addr.InstRecursive()}; + s32 imm_offset{0}; + if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + // If it's an IAdd64, get the immediate offset it is applying and grab the address + // instruction. This expects for the instruction to be canonicalized having the address on + // the first argument and the immediate offset on the second one. + const IR::U64 imm_offset_value{addr_inst->Arg(1)}; + if (!imm_offset_value.IsImmediate()) { + return std::nullopt; + } + imm_offset = static_cast(static_cast(imm_offset_value.U64())); + const IR::U64 iadd_addr{addr_inst->Arg(0)}; + if (iadd_addr.IsImmediate()) { + return std::nullopt; + } + addr_inst = iadd_addr.Inst(); + } + // With IAdd64 handled, now PackUint2x32 is expected without exceptions + if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + return std::nullopt; + } + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + // This vector is expected to be a CompositeConstructU32x2 + IR::Inst* const vector_inst{vector.InstRecursive()}; + if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + return std::nullopt; + } + // Grab the first argument from the CompositeConstructU32x2, this is the low address. + return LowAddrInfo{ + .value{IR::U32{vector_inst->Arg(0)}}, + .imm_offset{imm_offset}, + }; +} + /// Recursively tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { if (value.IsImmediate()) { @@ -191,13 +243,26 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, }; // First try to find storage buffers in the NVN address const IR::U64 addr{inst->Arg(0)}; - std::optional storage_buffer{Track(addr, &nvn_bias)}; + if (addr.IsImmediate()) { + // Immediate addresses can't be lowered to a storage buffer + DiscardGlobalMemory(block, inst); + return; + } + // Track the low address of the instruction + const std::optional low_addr_info{TrackLowAddress(addr.InstRecursive())}; + if (!low_addr_info) { + DiscardGlobalMemory(block, inst); + return; + } + const IR::U32 low_addr{low_addr_info->value}; + std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias - storage_buffer = Track(addr, nullptr); + storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also failed, drop the global memory usage DiscardGlobalMemory(block, inst); + return; } } // Collect storage buffer and the instruction @@ -208,58 +273,15 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, }); } -/// Tries to track the first 32-bits of a global memory instruction -std::optional TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { - // The first argument is the low level GPU pointer to the global memory instruction - const IR::U64 addr{inst->Arg(0)}; - if (addr.IsImmediate()) { - // Not much we can do if it's an immediate - return std::nullopt; - } - // This address is expected to either be a PackUint2x32 or a IAdd64 - IR::Inst* addr_inst{addr.InstRecursive()}; - s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { - // If it's an IAdd64, get the immediate offset it is applying and grab the address - // instruction. This expects for the instruction to be canonicalized having the address on - // the first argument and the immediate offset on the second one. - const IR::U64 imm_offset_value{addr_inst->Arg(1)}; - if (!imm_offset_value.IsImmediate()) { - return std::nullopt; - } - imm_offset = static_cast(static_cast(imm_offset_value.U64())); - const IR::U64 iadd_addr{addr_inst->Arg(0)}; - if (iadd_addr.IsImmediate()) { - return std::nullopt; - } - addr_inst = iadd_addr.Inst(); - } - // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { - return std::nullopt; - } - // PackUint2x32 is expected to be generated from a vector - const IR::Value vector{addr_inst->Arg(0)}; - if (vector.IsImmediate()) { - return std::nullopt; - } - // This vector is expected to be a CompositeConstructU32x2 - IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { - return std::nullopt; - } - // Grab the first argument from the CompositeConstructU32x2, this is the low address. - // Re-apply the offset in case we found one. - const IR::U32 low_addr{vector_inst->Arg(0)}; - return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; -} - /// Returns the offset in indices (not bytes) for an equivalent storage instruction IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { IR::IREmitter ir{block, inst}; IR::U32 offset; - if (const std::optional low_addr{TrackLowAddress(ir, &*inst)}) { - offset = *low_addr; + if (const std::optional low_addr{TrackLowAddress(&*inst)}) { + offset = low_addr->value; + if (low_addr->imm_offset != 0) { + offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); + } } else { offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); } From d5d468cf2cbe235ee149dbd37951389d2a7e61da Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Feb 2021 00:09:11 -0300 Subject: [PATCH 017/770] shader: Improve object pool --- .../frontend/ir/structured_control_flow.cpp | 10 +-- src/shader_recompiler/main.cpp | 22 ++--- src/shader_recompiler/object_pool.h | 84 +++++++++++-------- 3 files changed, 66 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index 2e9ce2525..d145095d1 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -269,7 +269,7 @@ bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { class GotoPass { public: - explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) + explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); @@ -554,7 +554,7 @@ private: return offset; } - ObjectPool& pool; + ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -589,7 +589,7 @@ Block* TryFindForwardBlock(const Statement& stmt) { class TranslatePass { public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, - ObjectPool& stmt_pool_, Statement& root_stmt, + ObjectPool& stmt_pool_, Statement& root_stmt, const std::function& func_, BlockList& block_list_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, block_list{block_list_} { @@ -720,7 +720,7 @@ private: return block; } - ObjectPool& stmt_pool; + ObjectPool& stmt_pool; ObjectPool& inst_pool; ObjectPool& block_pool; const std::function& func; @@ -731,7 +731,7 @@ private: BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, std::span unordered_blocks, const std::function& func) { - ObjectPool stmt_pool; + ObjectPool stmt_pool{64}; GotoPass goto_pass{unordered_blocks, stmt_pool}; BlockList block_list; TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 3b110af61..216345e91 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -37,7 +37,7 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique(path.string().c_str())); }); - auto block_pool{std::make_unique>()}; + ObjectPool block_pool; using namespace std::chrono; auto t0 = high_resolution_clock::now(); int N = 1; @@ -48,8 +48,8 @@ void RunDatabase() { // fmt::print(stdout, "Decoding {}\n", path.string()); const Location start_address{0}; - block_pool->ReleaseContents(); - Flow::CFG cfg{*env, *block_pool, start_address}; + block_pool.ReleaseContents(); + Flow::CFG cfg{*env, block_pool, start_address}; // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); @@ -63,18 +63,18 @@ void RunDatabase() { int main() { // RunDatabase(); - auto flow_block_pool{std::make_unique>()}; - auto inst_pool{std::make_unique>()}; - auto block_pool{std::make_unique>()}; + ObjectPool flow_block_pool; + ObjectPool inst_pool; + ObjectPool block_pool; // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; FileEnvironment env{"D:\\Shaders\\shader.bin"}; - block_pool->ReleaseContents(); - inst_pool->ReleaseContents(); - flow_block_pool->ReleaseContents(); - Flow::CFG cfg{env, *flow_block_pool, 0}; + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + flow_block_pool.ReleaseContents(); + Flow::CFG cfg{env, flow_block_pool, 0}; fmt::print(stdout, "{}\n", cfg.Dot()); - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; + IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); Backend::SPIRV::EmitSPIRV spirv{program}; } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index a573add32..f78813b5f 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -10,19 +10,11 @@ namespace Shader { -template +template requires std::is_destructible_v class ObjectPool { public: - ~ObjectPool() { - std::unique_ptr tree_owner; - Chunk* chunk{&root}; - while (chunk) { - for (size_t obj_id = chunk->free_objects; obj_id < chunk_size; ++obj_id) { - chunk->storage[obj_id].object.~T(); - } - tree_owner = std::move(chunk->next); - chunk = tree_owner.get(); - } + explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { + node = &chunks.emplace_back(new_chunk_size); } template @@ -31,17 +23,21 @@ public: } void ReleaseContents() { - Chunk* chunk{&root}; - while (chunk) { - if (chunk->free_objects == chunk_size) { - break; - } - for (; chunk->free_objects < chunk_size; ++chunk->free_objects) { - chunk->storage[chunk->free_objects].object.~T(); - } - chunk = chunk->next.get(); + if (chunks.empty()) { + return; + } + Chunk& root{chunks.front()}; + if (root.used_objects == root.num_objects) { + // Root chunk has been filled, squash allocations into it + const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)}; + chunks.clear(); + chunks.emplace_back(total_objects); + chunks.shrink_to_fit(); + } else { + root.Release(); + chunks.resize(1); + chunks.shrink_to_fit(); } - node = &root; } private: @@ -58,31 +54,51 @@ private: }; struct Chunk { - size_t free_objects = chunk_size; - std::array storage; - std::unique_ptr next; + explicit Chunk() = default; + explicit Chunk(size_t size) + : num_objects{size}, storage{std::make_unique(size)} {} + + Chunk& operator=(Chunk&& rhs) noexcept { + Release(); + used_objects = std::exchange(rhs.used_objects, 0); + num_objects = std::exchange(rhs.num_objects, 0); + storage = std::move(rhs.storage); + } + + Chunk(Chunk&& rhs) noexcept + : used_objects{std::exchange(rhs.used_objects, 0)}, + num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {} + + ~Chunk() { + Release(); + } + + void Release() { + std::destroy_n(storage.get(), used_objects); + used_objects = 0; + } + + size_t used_objects{}; + size_t num_objects{}; + std::unique_ptr storage; }; [[nodiscard]] T* Memory() { Chunk* const chunk{FreeChunk()}; - return &chunk->storage[--chunk->free_objects].object; + return &chunk->storage[chunk->used_objects++].object; } [[nodiscard]] Chunk* FreeChunk() { - if (node->free_objects > 0) { + if (node->used_objects != node->num_objects) { return node; } - if (node->next) { - node = node->next.get(); - return node; - } - node->next = std::make_unique(); - node = node->next.get(); + node = &chunks.emplace_back(new_chunk_size); return node; } - Chunk* node{&root}; - Chunk root; + Chunk* node{}; + std::vector chunks; + size_t new_chunk_size{}; }; } // namespace Shader From b5d7279d878211654b4abb165d94af763a365f47 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 04:10:22 -0300 Subject: [PATCH 018/770] spirv: Initial bindings support --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 4 + .../backend/spirv/emit_context.cpp | 160 ++++++++++++++ .../backend/spirv/emit_context.h | 67 ++++++ .../backend/spirv/emit_spirv.cpp | 203 ++++++++---------- .../backend/spirv/emit_spirv.h | 84 +------- .../spirv/emit_spirv_bitwise_conversion.cpp | 4 +- .../backend/spirv/emit_spirv_composite.cpp | 2 +- .../spirv/emit_spirv_context_get_set.cpp | 20 +- .../backend/spirv/emit_spirv_control_flow.cpp | 26 +++ .../spirv/emit_spirv_floating_point.cpp | 18 +- .../backend/spirv/emit_spirv_integer.cpp | 16 +- .../backend/spirv/emit_spirv_memory.cpp | 36 +++- .../backend/spirv/emit_spirv_undefined.cpp | 4 +- .../frontend/ir/basic_block.h | 16 ++ src/shader_recompiler/frontend/ir/program.h | 2 + .../frontend/maxwell/program.cpp | 7 +- .../ir_opt/collect_shader_info_pass.cpp | 81 +++++++ .../ir_opt/constant_propagation_pass.cpp | 76 +++++-- .../global_memory_to_storage_buffer_pass.cpp | 110 +++++----- src/shader_recompiler/ir_opt/passes.h | 4 +- src/shader_recompiler/main.cpp | 4 +- src/shader_recompiler/shader_info.h | 33 ++- 23 files changed, 679 insertions(+), 300 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_context.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_context.h create mode 100644 src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp diff --git a/externals/sirit b/externals/sirit index f819ade0e..200310e8f 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit f819ade0efe925a782090dea9e1bf300fedffb39 +Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index e1f4276a1..84be94a8d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,6 @@ add_executable(shader_recompiler + backend/spirv/emit_context.cpp + backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -75,6 +77,7 @@ add_executable(shader_recompiler frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h + ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -84,6 +87,7 @@ add_executable(shader_recompiler ir_opt/verification_pass.cpp main.cpp object_pool.h + shader_info.h ) target_include_directories(video_core PRIVATE sirit) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp new file mode 100644 index 000000000..1c985aff8 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -0,0 +1,160 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_context.h" + +namespace Shader::Backend::SPIRV { + +void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { + defs[0] = sirit_ctx.Name(base_type, name); + + std::array def_name; + for (int i = 1; i < 4; ++i) { + const std::string_view def_name_view( + def_name.data(), + fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); + defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + } +} + +EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { + AddCapability(spv::Capability::Shader); + DefineCommonTypes(program.info); + DefineCommonConstants(); + DefineSpecialVariables(program.info); + DefineConstantBuffers(program.info); + DefineStorageBuffers(program.info); + DefineLabels(program); +} + +EmitContext::~EmitContext() = default; + +Id EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return value.Inst()->Definition(); + } + switch (value.Type()) { + case IR::Type::U1: + return value.U1() ? true_value : false_value; + case IR::Type::U32: + return Constant(U32[1], value.U32()); + case IR::Type::F32: + return Constant(F32[1], value.F32()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} + +void EmitContext::DefineCommonTypes(const Info& info) { + void_id = TypeVoid(); + + U1 = Name(TypeBool(), "u1"); + + F32.Define(*this, TypeFloat(32), "f32"); + U32.Define(*this, TypeInt(32, false), "u32"); + + if (info.uses_fp16) { + AddCapability(spv::Capability::Float16); + F16.Define(*this, TypeFloat(16), "f16"); + } + if (info.uses_fp64) { + AddCapability(spv::Capability::Float64); + F64.Define(*this, TypeFloat(64), "f64"); + } +} + +void EmitContext::DefineCommonConstants() { + true_value = ConstantTrue(U1); + false_value = ConstantFalse(U1); + u32_zero_value = Constant(U32[1], 0U); +} + +void EmitContext::DefineSpecialVariables(const Info& info) { + const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { + const Id pointer_type{TypePointer(storage_class, type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; + Decorate(id, spv::Decoration::BuiltIn, builtin); + return id; + }}; + using namespace std::placeholders; + const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; + + if (info.uses_workgroup_id) { + workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); + } + if (info.uses_local_invocation_id) { + local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); + } +} + +void EmitContext::DefineConstantBuffers(const Info& info) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; + Decorate(array_type, spv::Decoration::ArrayStride, 16U); + + const Id struct_type{TypeStruct(array_type)}; + Name(struct_type, "cbuf_block"); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; + uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); + + u32 binding{}; + for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; + Decorate(id, spv::Decoration::Binding, binding); + Name(id, fmt::format("c{}", desc.index)); + std::fill_n(cbufs.data() + desc.index, desc.count, id); + binding += desc.count; + } +} + +void EmitContext::DefineStorageBuffers(const Info& info) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + AddExtension("SPV_KHR_storage_buffer_storage_class"); + + const Id array_type{TypeRuntimeArray(U32[1])}; + Decorate(array_type, spv::Decoration::ArrayStride, 4U); + + const Id struct_type{TypeStruct(array_type)}; + Name(struct_type, "ssbo_block"); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); + + u32 binding{}; + for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; + Decorate(id, spv::Decoration::Binding, binding); + Name(id, fmt::format("ssbo{}", binding)); + std::fill_n(ssbos.data() + binding, desc.count, id); + binding += desc.count; + } +} + +void EmitContext::DefineLabels(IR::Program& program) { + for (const IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + block->SetDefinition(OpLabel()); + } + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h new file mode 100644 index 000000000..c4b84759d --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class VectorTypes { +public: + void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); + + [[nodiscard]] Id operator[](size_t size) const noexcept { + return defs[size - 1]; + } + +private: + std::array defs{}; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(IR::Program& program); + ~EmitContext(); + + [[nodiscard]] Id Def(const IR::Value& value); + + Id void_id{}; + Id U1{}; + VectorTypes F32; + VectorTypes U32; + VectorTypes F16; + VectorTypes F64; + + Id true_value{}; + Id false_value{}; + Id u32_zero_value{}; + + Id uniform_u32{}; + Id storage_u32{}; + + std::array cbufs{}; + std::array ssbos{}; + + Id workgroup_id{}; + Id local_invocation_id{}; + +private: + void DefineCommonTypes(const Info& info); + void DefineCommonConstants(); + void DefineSpecialVariables(const Info& info); + void DefineConstantBuffers(const Info& info); + void DefineStorageBuffers(const Info& info); + void DefineLabels(IR::Program& program); +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0895414b4..c79c09774 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -12,60 +12,22 @@ #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { +namespace { +template +struct FuncTraits : FuncTraits {}; -EmitContext::EmitContext(IR::Program& program) { - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::Float16); - AddCapability(spv::Capability::Float64); - void_id = TypeVoid(); +template +struct FuncTraits { + using ReturnType = ReturnType_; - u1 = Name(TypeBool(), "u1"); - f32.Define(*this, TypeFloat(32), "f32"); - u32.Define(*this, TypeInt(32, false), "u32"); - f16.Define(*this, TypeFloat(16), "f16"); - f64.Define(*this, TypeFloat(64), "f64"); + static constexpr size_t NUM_ARGS = sizeof...(Args); - true_value = ConstantTrue(u1); - false_value = ConstantFalse(u1); - - for (const IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - block_label_map.emplace_back(block, OpLabel()); - } - } - std::ranges::sort(block_label_map, {}, &std::pair::first); -} - -EmitContext::~EmitContext() = default; - -EmitSPIRV::EmitSPIRV(IR::Program& program) { - EmitContext ctx{program}; - const Id void_function{ctx.TypeFunction(ctx.void_id)}; - // FIXME: Forward declare functions (needs sirit support) - Id func{}; - for (IR::Function& function : program.functions) { - func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); - for (IR::Block* const block : function.blocks) { - ctx.AddLabel(ctx.BlockLabel(block)); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } - } - ctx.OpFunctionEnd(); - } - ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main"); - - std::vector result{ctx.Assemble()}; - std::FILE* file{std::fopen("shader.spv", "wb")}; - std::fwrite(result.data(), sizeof(u32), result.size(), file); - std::fclose(file); - std::system("spirv-dis shader.spv"); - std::system("spirv-val shader.spv"); - std::system("spirv-cross shader.spv"); -} + template + using ArgType = std::tuple_element_t>; +}; template -static void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { +void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { const Id forward_id{inst->Definition()}; const bool has_forward_id{Sirit::ValidId(forward_id)}; Id current_id{}; @@ -80,42 +42,90 @@ static void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Arg } } -template -static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using M = decltype(method); - using std::is_invocable_r_v; - if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), inst->Arg(1).U32()); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst->Arg(0)); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst->Arg(0), inst->Arg(1)); - } else if constexpr (is_invocable_r_v) { - (emit.*method)(ctx, inst); - } else if constexpr (is_invocable_r_v) { - (emit.*method)(ctx); - } else { - static_assert(false, "Bad format"); +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Label(); } } +template +void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition(emit, ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition(emit, ctx, inst, + Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + (emit.*method)(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + (emit.*method)(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(emit, ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(emit, ctx, inst, Indices{}); + } +} +} // Anonymous namespace + +EmitSPIRV::EmitSPIRV(IR::Program& program) { + EmitContext ctx{program}; + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + // FIXME: Forward declare functions (needs sirit support) + Id func{}; + for (IR::Function& function : program.functions) { + func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); + for (IR::Block* const block : function.blocks) { + ctx.AddLabel(block->Definition()); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); + } + } + ctx.OpFunctionEnd(); + } + boost::container::small_vector interfaces; + if (program.info.uses_workgroup_id) { + interfaces.push_back(ctx.workgroup_id); + } + if (program.info.uses_local_invocation_id) { + interfaces.push_back(ctx.local_invocation_id); + } + + const std::span interfaces_span(interfaces.data(), interfaces.size()); + ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); + ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); + + std::vector result{ctx.Assemble()}; + std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; + std::fwrite(result.data(), sizeof(u32), result.size(), file); + std::fclose(file); + std::system("spirv-dis D:\\shader.spv") == 0 && + std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && + std::system("spirv-cross -V D:\\shader.spv") == 0; +} + void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { switch (inst->Opcode()) { #define OPCODE(name, result_type, ...) \ @@ -130,9 +140,9 @@ void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { static Id TypeId(const EmitContext& ctx, IR::Type type) { switch (type) { case IR::Type::U1: - return ctx.u1; + return ctx.U1; case IR::Type::U32: - return ctx.u32[1]; + return ctx.U32[1]; default: throw NotImplementedException("Phi node type {}", type); } @@ -162,7 +172,7 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { } IR::Block* const phi_block{inst->PhiBlock(index)}; operands.push_back(def); - operands.push_back(ctx.BlockLabel(phi_block)); + operands.push_back(phi_block->Definition()); } const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); @@ -174,29 +184,6 @@ void EmitSPIRV::EmitIdentity(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -// FIXME: Move to its own file -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); -} - -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), - ctx.BlockLabel(inst->Arg(2).Label())); -} - -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Inst* inst) { - ctx.OpLoopMerge(ctx.BlockLabel(inst->Arg(0).Label()), ctx.BlockLabel(inst->Arg(1).Label()), - spv::LoopControlMask::MaskNone); -} - -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst) { - ctx.OpSelectionMerge(ctx.BlockLabel(inst->Arg(0).Label()), spv::SelectionControlMask::MaskNone); -} - -void EmitSPIRV::EmitReturn(EmitContext& ctx) { - ctx.OpReturn(); -} - void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7d76377b5..a5d0e1ec0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -7,82 +7,12 @@ #include #include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -using Sirit::Id; - -class VectorTypes { -public: - void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { - defs[0] = sirit_ctx.Name(base_type, name); - - std::array def_name; - for (int i = 1; i < 4; ++i) { - const std::string_view def_name_view( - def_name.data(), - fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); - } - } - - [[nodiscard]] Id operator[](size_t size) const noexcept { - return defs[size - 1]; - } - -private: - std::array defs; -}; - -class EmitContext final : public Sirit::Module { -public: - explicit EmitContext(IR::Program& program); - ~EmitContext(); - - [[nodiscard]] Id Def(const IR::Value& value) { - if (!value.IsImmediate()) { - return value.Inst()->Definition(); - } - switch (value.Type()) { - case IR::Type::U1: - return value.U1() ? true_value : false_value; - case IR::Type::U32: - return Constant(u32[1], value.U32()); - case IR::Type::F32: - return Constant(f32[1], value.F32()); - default: - throw NotImplementedException("Immediate type {}", value.Type()); - } - } - - [[nodiscard]] Id BlockLabel(IR::Block* block) const { - const auto it{std::ranges::lower_bound(block_label_map, block, {}, - &std::pair::first)}; - if (it == block_label_map.end()) { - throw LogicError("Undefined block"); - } - return it->second; - } - - Id void_id{}; - Id u1{}; - VectorTypes f32; - VectorTypes u32; - VectorTypes f16; - VectorTypes f64; - - Id true_value{}; - Id false_value{}; - - Id workgroup_id{}; - Id local_invocation_id{}; - -private: - std::vector> block_label_map; -}; - class EmitSPIRV { public: explicit EmitSPIRV(IR::Program& program); @@ -94,10 +24,11 @@ private: Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); - void EmitBranch(EmitContext& ctx, IR::Inst* inst); - void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); - void EmitLoopMerge(EmitContext& ctx, IR::Inst* inst); - void EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst); + void EmitBranch(EmitContext& ctx, IR::Block* label); + void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); + void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); + void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); void EmitReturn(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); @@ -150,7 +81,8 @@ private: void EmitWriteStorageS8(EmitContext& ctx); void EmitWriteStorageU16(EmitContext& ctx); void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx); + void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage64(EmitContext& ctx); void EmitWriteStorage128(EmitContext& ctx); void EmitCompositeConstructU32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 447df5b8c..af82df99c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -11,7 +11,7 @@ void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { } Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { - return ctx.OpBitcast(ctx.u32[1], value); + return ctx.OpBitcast(ctx.U32[1], value); } void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { @@ -23,7 +23,7 @@ void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { } Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { - return ctx.OpBitcast(ctx.f32[1], value); + return ctx.OpBitcast(ctx.F32[1], value); } void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index b190cf876..a7374c89d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -23,7 +23,7 @@ void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { } Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { - return ctx.OpCompositeExtract(ctx.u32[1], vector, index); + return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1eab739ed..f4c9970eb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -37,7 +37,10 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: if (!offset.IsImmediate()) { throw NotImplementedException("Variable constant buffer offset"); } - return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_cbuf"); + const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / 4)}; + const Id cbuf{ctx.cbufs[binding.U32()]}; + const Id access_chain{ctx.OpAccessChain(ctx.uniform_u32, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(ctx.U32[1], access_chain); } void EmitSPIRV::EmitGetAttribute(EmitContext&) { @@ -89,22 +92,11 @@ void EmitSPIRV::EmitSetOFlag(EmitContext&) { } Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { - if (ctx.workgroup_id.value == 0) { - ctx.workgroup_id = ctx.AddGlobalVariable( - ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); - ctx.Decorate(ctx.workgroup_id, spv::Decoration::BuiltIn, spv::BuiltIn::WorkgroupId); - } - return ctx.OpLoad(ctx.u32[3], ctx.workgroup_id); + return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { - if (ctx.local_invocation_id.value == 0) { - ctx.local_invocation_id = ctx.AddGlobalVariable( - ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); - ctx.Decorate(ctx.local_invocation_id, spv::Decoration::BuiltIn, - spv::BuiltIn::LocalInvocationId); - } - return ctx.OpLoad(ctx.u32[3], ctx.local_invocation_id); + return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 66ce6c8c5..549c1907a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,3 +3,29 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { + ctx.OpBranch(label->Definition()); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label) { + ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); +} + +void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { + ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), + spv::LoopControlMask::MaskNone); +} + +void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { + ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); +} + +void EmitSPIRV::EmitReturn(EmitContext& ctx) { + ctx.OpReturn(); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9c39537e2..c9bc121f8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -46,27 +46,27 @@ void EmitSPIRV::EmitFPAbs64(EmitContext&) { } Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFAdd(ctx.f16[1], a, b)); + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFAdd(ctx.f32[1], a, b)); + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFAdd(ctx.f64[1], a, b)); + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { - return Decorate(ctx, inst, ctx.OpFma(ctx.f16[1], a, b, c)); + return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { - return Decorate(ctx, inst, ctx.OpFma(ctx.f32[1], a, b, c)); + return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { - return Decorate(ctx, inst, ctx.OpFma(ctx.f64[1], a, b, c)); + return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } void EmitSPIRV::EmitFPMax32(EmitContext&) { @@ -86,15 +86,15 @@ void EmitSPIRV::EmitFPMin64(EmitContext&) { } Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFMul(ctx.f16[1], a, b)); + return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFMul(ctx.f32[1], a, b)); + return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFMul(ctx.f64[1], a, b)); + return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } void EmitSPIRV::EmitFPNeg16(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index e811a63ab..32af94a73 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -10,7 +10,7 @@ Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } - return ctx.OpIAdd(ctx.u32[1], a, b); + return ctx.OpIAdd(ctx.U32[1], a, b); } void EmitSPIRV::EmitIAdd64(EmitContext&) { @@ -18,7 +18,7 @@ void EmitSPIRV::EmitIAdd64(EmitContext&) { } Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { - return ctx.OpISub(ctx.u32[1], a, b); + return ctx.OpISub(ctx.U32[1], a, b); } void EmitSPIRV::EmitISub64(EmitContext&) { @@ -26,7 +26,7 @@ void EmitSPIRV::EmitISub64(EmitContext&) { } Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { - return ctx.OpIMul(ctx.u32[1], a, b); + return ctx.OpIMul(ctx.U32[1], a, b); } void EmitSPIRV::EmitINeg32(EmitContext&) { @@ -38,7 +38,7 @@ void EmitSPIRV::EmitIAbs32(EmitContext&) { } Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { - return ctx.OpShiftLeftLogical(ctx.u32[1], base, shift); + return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { @@ -70,11 +70,11 @@ void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { } Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { - return ctx.OpBitFieldUExtract(ctx.u32[1], base, offset, count); + return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpSLessThan(ctx.u1, lhs, rhs); + return ctx.OpSLessThan(ctx.U1, lhs, rhs); } void EmitSPIRV::EmitULessThan(EmitContext&) { @@ -94,7 +94,7 @@ void EmitSPIRV::EmitULessThanEqual(EmitContext&) { } Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpSGreaterThan(ctx.u1, lhs, rhs); + return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } void EmitSPIRV::EmitUGreaterThan(EmitContext&) { @@ -110,7 +110,7 @@ void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { } Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpUGreaterThanEqual(ctx.u1, lhs, rhs); + return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } void EmitSPIRV::EmitLogicalOr(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 21a0d72fa..5769a3c95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -2,10 +2,26 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast(offset.U32() / element_size)}; + return ctx.Constant(ctx.U32[1], imm_offset); + } + const u32 shift{static_cast(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } @@ -79,11 +95,14 @@ void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { } Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { + const IR::Value& offset) { if (!binding.IsImmediate()) { - throw NotImplementedException("Storage buffer indexing"); + throw NotImplementedException("Dynamic storage buffer indexing"); } - return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf"); + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); } void EmitSPIRV::EmitLoadStorage64(EmitContext&) { @@ -110,8 +129,15 @@ void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx) { - ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf_store"); +void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + ctx.OpStore(pointer, value); } void EmitSPIRV::EmitWriteStorage64(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index a6f542360..c1ed8f281 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { - return ctx.OpUndef(ctx.u1); + return ctx.OpUndef(ctx.U1); } Id EmitSPIRV::EmitUndefU8(EmitContext&) { @@ -19,7 +19,7 @@ Id EmitSPIRV::EmitUndefU16(EmitContext&) { } Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { - return ctx.OpUndef(ctx.u32[1]); + return ctx.OpUndef(ctx.U32[1]); } Id EmitSPIRV::EmitUndefU64(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 778b32e43..b14a35ec5 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,6 +11,7 @@ #include +#include "common/bit_cast.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" @@ -68,6 +69,18 @@ public: /// Gets an immutable span to the immediate predecessors. [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + [[nodiscard]] Condition BranchCondition() const noexcept { return branch_cond; } @@ -161,6 +174,9 @@ private: Block* branch_false{nullptr}; /// Block immediate predecessors std::vector imm_predecessors; + + /// Intrusively stored host definition of this block. + u32 definition{}; }; using BlockList = std::vector; diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index efaf1aa1e..98aab2dc6 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -9,11 +9,13 @@ #include #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { struct Program { boost::container::small_vector functions; + Info info; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dab6d68c0..8331d576c 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -53,21 +53,22 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolInstructions()) { + Visit(info, inst); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index cbde65b9b..f1ad16d60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -77,6 +77,16 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { return true; } +template +bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return false; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + return true; +} + void FoldGetRegister(IR::Inst& inst) { if (inst.Arg(0).Reg() == IR::Reg::RZ) { inst.ReplaceUsesWith(IR::Value{u32{0}}); @@ -103,6 +113,52 @@ void FoldAdd(IR::Inst& inst) { } } +void FoldISub32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { + return; + } + if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { + return; + } + // ISub32 is generally used to subtract two constant buffers, compare and replace this with + // zero if they equal. + const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { + return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + }}; + IR::Inst* op_a{inst.Arg(0).InstRecursive()}; + IR::Inst* op_b{inst.Arg(1).InstRecursive()}; + if (equal_cbuf(op_a, op_b)) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + return; + } + // It's also possible a value is being added to a cbuf and then subtracted + if (op_b->Opcode() == IR::Opcode::IAdd32) { + // Canonicalize local variables to simplify the following logic + std::swap(op_a, op_b); + } + if (op_b->Opcode() != IR::Opcode::GetCbuf) { + return; + } + IR::Inst* const inst_cbuf{op_b}; + if (op_a->Opcode() != IR::Opcode::IAdd32) { + return; + } + IR::Value add_op_a{op_a->Arg(0)}; + IR::Value add_op_b{op_a->Arg(1)}; + if (add_op_b.IsImmediate()) { + // Canonicalize + std::swap(add_op_a, add_op_b); + } + if (add_op_b.IsImmediate()) { + return; + } + IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; + if (equal_cbuf(add_cbuf, inst_cbuf)) { + inst.ReplaceUsesWith(add_op_a); + } +} + template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; @@ -170,15 +226,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg>(inst.Arg(I))...)}; } -template -void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); -} - void FoldBranchConditional(IR::Inst& inst) { const IR::U1 cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -205,6 +252,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldGetPred(inst); case IR::Opcode::IAdd32: return FoldAdd(inst); + case IR::Opcode::ISub32: + return FoldISub32(inst); case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: @@ -220,17 +269,20 @@ void ConstantPropagation(IR::Inst& inst) { case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); case IR::Opcode::SLessThan: - return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + return; case IR::Opcode::ULessThan: - return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + return; case IR::Opcode::BitFieldUExtract: - return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } return (base >> shift) & ((1U << count) - 1); }); + return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); default: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index b40c0c57b..bf230a850 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -28,7 +28,8 @@ struct StorageBufferAddr { /// Block iterator to a global memory instruction and the storage buffer it uses struct StorageInst { StorageBufferAddr storage_buffer; - IR::Block::iterator inst; + IR::Inst* inst; + IR::Block* block; }; /// Bias towards a certain range of constant buffers when looking for storage buffers @@ -41,7 +42,7 @@ struct Bias { using StorageBufferSet = boost::container::flat_set, boost::container::small_vector>; -using StorageInstVector = boost::container::small_vector; +using StorageInstVector = boost::container::small_vector; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -109,23 +110,22 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce } /// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: case IR::Opcode::LoadGlobalU16: case IR::Opcode::LoadGlobal32: - inst->ReplaceUsesWith(zero); + inst.ReplaceUsesWith(zero); break; case IR::Opcode::LoadGlobal64: - inst->ReplaceUsesWith(IR::Value{ - &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); break; case IR::Opcode::LoadGlobal128: - inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( - inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); break; case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: @@ -134,11 +134,10 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: - inst->Invalidate(); + inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst->Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); } } @@ -232,8 +231,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) } /// Collects the storage buffer used by a global memory instruction and the instruction itself -void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, - StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, + StorageInstVector& to_replace) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -241,19 +240,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, .offset_begin{0x110}, .offset_end{0x610}, }; - // First try to find storage buffers in the NVN address - const IR::U64 addr{inst->Arg(0)}; - if (addr.IsImmediate()) { - // Immediate addresses can't be lowered to a storage buffer - DiscardGlobalMemory(block, inst); - return; - } // Track the low address of the instruction - const std::optional low_addr_info{TrackLowAddress(addr.InstRecursive())}; + const std::optional low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { DiscardGlobalMemory(block, inst); return; } + // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { @@ -269,21 +262,22 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{inst}, + .inst{&inst}, + .block{&block}, }); } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { - IR::IREmitter ir{block, inst}; +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; - if (const std::optional low_addr{TrackLowAddress(&*inst)}) { + if (const std::optional low_addr{TrackLowAddress(&inst)}) { offset = low_addr->value; if (low_addr->imm_offset != 0) { offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); + offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. @@ -292,25 +286,27 @@ IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferA } /// Replace a global memory load instruction with its storage buffer equivalent -void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; - inst->ReplaceUsesWith(value); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; + inst.ReplaceUsesWith(value); } /// Replace a global memory write instruction with its storage buffer equivalent -void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); - inst->Invalidate(); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); + inst.Invalidate(); } /// Replace a global memory instruction with its storage buffer equivalent -void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -328,26 +324,44 @@ void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_ case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); } } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Block& block) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { - if (!IsGlobalMemory(*inst)) { - continue; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; + } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + } } - CollectStorageBuffers(block, inst, storage_buffers, to_replace); } - for (const auto [storage_buffer, inst] : to_replace) { - const auto it{storage_buffers.find(storage_buffer)}; - const IR::U32 storage_index{IR::Value{static_cast(storage_buffers.index_of(it))}}; - const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; - Replace(block, inst, storage_index, offset); + Info& info{program.info}; + u32 storage_index{}; + for (const StorageBufferAddr& storage_buffer : storage_buffers) { + info.storage_buffers_descriptors.push_back({ + .cbuf_index{storage_buffer.index}, + .cbuf_offset{storage_buffer.offset}, + .count{1}, + }); + info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); + ++storage_index; + } + for (const StorageInst& storage_inst : to_replace) { + const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; + const auto it{storage_buffers.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + IR::Block* const block{storage_inst.block}; + IR::Inst* const inst{storage_inst.inst}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 30eb31588..89e5811d3 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,6 +8,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { @@ -18,9 +19,10 @@ void PostOrderInvoke(Func&& func, IR::Function& function) { } } +void CollectShaderInfoPass(IR::Program& program); void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); -void GlobalMemoryToStorageBufferPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 216345e91..1610bb34e 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -67,8 +67,8 @@ int main() { ObjectPool inst_pool; ObjectPool block_pool; - // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - FileEnvironment env{"D:\\Shaders\\shader.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + // FileEnvironment env{"D:\\Shaders\\shader.bin"}; block_pool.ReleaseContents(); inst_pool.ReleaseContents(); flow_block_pool.ReleaseContents(); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 1760bf4a9..f49a79368 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -6,23 +6,40 @@ #include +#include "common/common_types.h" + #include namespace Shader { struct Info { - struct ConstantBuffer { + static constexpr size_t MAX_CBUFS{18}; + static constexpr size_t MAX_SSBOS{16}; + struct ConstantBufferDescriptor { + u32 index; + u32 count; }; - struct { - bool workgroup_id{}; - bool local_invocation_id{}; - bool fp16{}; - bool fp64{}; - } uses; + struct StorageBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; + }; - std::array<18 + bool uses_workgroup_id{}; + bool uses_local_invocation_id{}; + bool uses_fp16{}; + bool uses_fp64{}; + + u32 constant_buffer_mask{}; + + std::array constant_buffers{}; + boost::container::static_vector + constant_buffer_descriptors; + + std::array storage_buffers{}; + boost::container::static_vector storage_buffers_descriptors; }; } // namespace Shader From 3a59fffaa16838985f9f953f30d1af4aa0f86252 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 19:48:58 -0300 Subject: [PATCH 019/770] spirv: Implement EmitIdentity --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 ++-- src/shader_recompiler/backend/spirv/emit_spirv.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c79c09774..55018332e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -180,8 +180,8 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { void EmitSPIRV::EmitVoid(EmitContext&) {} -void EmitSPIRV::EmitIdentity(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) { + return ctx.Def(value); } void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index a5d0e1ec0..8bde82613 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -23,7 +23,7 @@ private: // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); - void EmitIdentity(EmitContext& ctx); + Id EmitIdentity(EmitContext& ctx, const IR::Value& value); void EmitBranch(EmitContext& ctx, IR::Block* label); void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, IR::Block* false_label); From 3633e433774927681b02e1de665a608730f5582c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 19:49:24 -0300 Subject: [PATCH 020/770] shader: Add utility to resolve identities on a value --- src/shader_recompiler/frontend/ir/value.cpp | 7 +++++++ src/shader_recompiler/frontend/ir/value.h | 1 + 2 files changed, 8 insertions(+) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 9ea61813b..718314213 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -79,6 +79,13 @@ IR::Inst* Value::InstRecursive() const { return inst; } +IR::Value Value::Resolve() const { + if (IsIdentity()) { + return inst->Arg(0).Resolve(); + } + return *this; +} + IR::Reg Value::Reg() const { ValidateAccess(Type::Reg); return reg; diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 2f3688c73..8aba0bbf6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -41,6 +41,7 @@ public: [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Attribute Attribute() const; From 4b438f94cf56ac065cb3682a8e6ce1ea8df0ae7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 19:49:45 -0300 Subject: [PATCH 021/770] shader: Simplify ISCADD --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index f92c0bbd6..f06046d4d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -50,12 +50,7 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } // Anonymous namespace void TranslatorVisitor::ISCADD_reg(u64 insn) { - union { - u64 raw; - BitField<20, 8, IR::Reg> op_b; - } const iscadd{insn}; - - ISCADD(*this, insn, X(iscadd.op_b)); + ISCADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::ISCADD_cbuf(u64) { From 58914796c06662f4f901a4f195057ee1327cf055 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 19:50:23 -0300 Subject: [PATCH 022/770] shader: Add XMAD multiplication folding optimization --- .../ir_opt/constant_propagation_pass.cpp | 82 +++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f1ad16d60..9eb61b54c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -9,6 +9,7 @@ #include "common/bit_cast.h" #include "common/bit_util.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/ir_opt/passes.h" @@ -99,8 +100,71 @@ void FoldGetPred(IR::Inst& inst) { } } +/// Replaces the pattern generated by two XMAD multiplications +bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { + /* + * We are looking for this pattern: + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) + * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) + * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) + * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) + * + * And replacing it with + * %result = IMul32 %factor_a, %factor_b + * + * This optimization has been proven safe by LLVM and MSVC. + */ + const IR::Value lhs_arg{inst.Arg(0)}; + const IR::Value rhs_arg{inst.Arg(1)}; + if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { + return false; + } + IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; + if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + return false; + } + if (lhs_shl->Arg(0).IsImmediate()) { + return false; + } + IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; + IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; + if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + return false; + } + if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { + return false; + } + const IR::U32 factor_b{lhs_mul->Arg(1)}; + if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { + return false; + } + IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; + IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; + if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { + return false; + } + if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { + return false; + } + if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { + return false; + } + const IR::U32 factor_a{lhs_bfe->Arg(0)}; + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); + return true; +} + template -void FoldAdd(IR::Inst& inst) { +void FoldAdd(IR::Block& block, IR::Inst& inst) { if (inst.HasAssociatedPseudoOperation()) { return; } @@ -110,6 +174,12 @@ void FoldAdd(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate() && Arg(rhs) == 0) { inst.ReplaceUsesWith(inst.Arg(0)); + return; + } + if constexpr (std::is_same_v) { + if (FoldXmadMultiply(block, inst)) { + return; + } } } @@ -244,14 +314,14 @@ void FoldBranchConditional(IR::Inst& inst) { } } -void ConstantPropagation(IR::Inst& inst) { +void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: return FoldGetPred(inst); case IR::Opcode::IAdd32: - return FoldAdd(inst); + return FoldAdd(block, inst); case IR::Opcode::ISub32: return FoldISub32(inst); case IR::Opcode::BitCastF32U32: @@ -259,7 +329,7 @@ void ConstantPropagation(IR::Inst& inst) { case IR::Opcode::BitCastU32F32: return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: - return FoldAdd(inst); + return FoldAdd(block, inst); case IR::Opcode::Select32: return FoldSelect(inst); case IR::Opcode::LogicalAnd: @@ -292,7 +362,9 @@ void ConstantPropagation(IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Block& block) { - std::ranges::for_each(block, ConstantPropagation); + for (IR::Inst& inst : block) { + ConstantPropagation(block, inst); + } } } // namespace Shader::Optimization From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: [PATCH 023/770] shader: Remove old shader management --- CMakeModules/GenerateSCMRev.cmake | 56 +- externals/Vulkan-Headers | 2 +- externals/sirit | 2 +- src/common/CMakeLists.txt | 56 +- src/video_core/CMakeLists.txt | 64 - .../engines/const_buffer_engine_interface.h | 103 - src/video_core/engines/kepler_compute.cpp | 44 +- src/video_core/engines/kepler_compute.h | 20 +- src/video_core/engines/maxwell_3d.cpp | 38 - src/video_core/engines/maxwell_3d.h | 20 +- src/video_core/guest_driver.cpp | 37 - src/video_core/guest_driver.h | 46 - src/video_core/rasterizer_interface.h | 16 +- .../renderer_opengl/gl_arb_decompiler.cpp | 2124 ------------ .../renderer_opengl/gl_arb_decompiler.h | 29 - .../renderer_opengl/gl_rasterizer.cpp | 314 +- .../renderer_opengl/gl_rasterizer.h | 33 +- .../renderer_opengl/gl_shader_cache.cpp | 564 +--- .../renderer_opengl/gl_shader_cache.h | 102 +- .../renderer_opengl/gl_shader_decompiler.cpp | 2986 ----------------- .../renderer_opengl/gl_shader_decompiler.h | 69 - .../renderer_opengl/gl_shader_disk_cache.cpp | 482 --- .../renderer_opengl/gl_shader_disk_cache.h | 176 - src/video_core/renderer_vulkan/blit_image.cpp | 1 - .../renderer_vulkan/vk_compute_pipeline.cpp | 136 +- .../renderer_vulkan/vk_compute_pipeline.h | 47 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 484 --- .../renderer_vulkan/vk_graphics_pipeline.h | 103 - .../renderer_vulkan/vk_pipeline_cache.cpp | 375 +-- .../renderer_vulkan/vk_pipeline_cache.h | 91 +- .../renderer_vulkan/vk_rasterizer.cpp | 361 +- .../renderer_vulkan/vk_rasterizer.h | 47 +- src/video_core/shader/ast.cpp | 752 ----- src/video_core/shader/ast.h | 398 --- src/video_core/shader/async_shaders.cpp | 234 -- src/video_core/shader/async_shaders.h | 138 - src/video_core/shader/compiler_settings.cpp | 26 - src/video_core/shader/compiler_settings.h | 26 - src/video_core/shader/control_flow.cpp | 751 ----- src/video_core/shader/control_flow.h | 117 - src/video_core/shader/decode.cpp | 368 -- src/video_core/shader/decode/arithmetic.cpp | 166 - .../shader/decode/arithmetic_half.cpp | 101 - .../decode/arithmetic_half_immediate.cpp | 54 - .../shader/decode/arithmetic_immediate.cpp | 53 - .../shader/decode/arithmetic_integer.cpp | 375 --- .../decode/arithmetic_integer_immediate.cpp | 99 - src/video_core/shader/decode/bfe.cpp | 77 - src/video_core/shader/decode/bfi.cpp | 45 - src/video_core/shader/decode/conversion.cpp | 321 -- src/video_core/shader/decode/ffma.cpp | 62 - src/video_core/shader/decode/float_set.cpp | 58 - .../shader/decode/float_set_predicate.cpp | 57 - src/video_core/shader/decode/half_set.cpp | 115 - .../shader/decode/half_set_predicate.cpp | 80 - src/video_core/shader/decode/hfma2.cpp | 73 - src/video_core/shader/decode/image.cpp | 536 --- src/video_core/shader/decode/integer_set.cpp | 49 - .../shader/decode/integer_set_predicate.cpp | 53 - src/video_core/shader/decode/memory.cpp | 493 --- src/video_core/shader/decode/other.cpp | 322 -- .../shader/decode/predicate_set_predicate.cpp | 68 - .../shader/decode/predicate_set_register.cpp | 46 - .../shader/decode/register_set_predicate.cpp | 86 - src/video_core/shader/decode/shift.cpp | 153 - src/video_core/shader/decode/texture.cpp | 935 ------ src/video_core/shader/decode/video.cpp | 169 - src/video_core/shader/decode/warp.cpp | 117 - src/video_core/shader/decode/xmad.cpp | 156 - src/video_core/shader/expr.cpp | 93 - src/video_core/shader/expr.h | 156 - src/video_core/shader/memory_util.cpp | 76 - src/video_core/shader/memory_util.h | 43 - src/video_core/shader/node.h | 701 ---- src/video_core/shader/node_helper.cpp | 115 - src/video_core/shader/node_helper.h | 71 - src/video_core/shader/registry.cpp | 181 - src/video_core/shader/registry.h | 172 - src/video_core/shader/shader_ir.cpp | 464 --- src/video_core/shader/shader_ir.h | 479 --- src/video_core/shader/track.cpp | 236 -- src/video_core/shader/transform_feedback.cpp | 115 - src/video_core/shader/transform_feedback.h | 23 - 83 files changed, 57 insertions(+), 19625 deletions(-) delete mode 100644 src/video_core/engines/const_buffer_engine_interface.h delete mode 100644 src/video_core/guest_driver.cpp delete mode 100644 src/video_core/guest_driver.h delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/shader/ast.cpp delete mode 100644 src/video_core/shader/ast.h delete mode 100644 src/video_core/shader/async_shaders.cpp delete mode 100644 src/video_core/shader/async_shaders.h delete mode 100644 src/video_core/shader/compiler_settings.cpp delete mode 100644 src/video_core/shader/compiler_settings.h delete mode 100644 src/video_core/shader/control_flow.cpp delete mode 100644 src/video_core/shader/control_flow.h delete mode 100644 src/video_core/shader/decode.cpp delete mode 100644 src/video_core/shader/decode/arithmetic.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer_immediate.cpp delete mode 100644 src/video_core/shader/decode/bfe.cpp delete mode 100644 src/video_core/shader/decode/bfi.cpp delete mode 100644 src/video_core/shader/decode/conversion.cpp delete mode 100644 src/video_core/shader/decode/ffma.cpp delete mode 100644 src/video_core/shader/decode/float_set.cpp delete mode 100644 src/video_core/shader/decode/float_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/half_set.cpp delete mode 100644 src/video_core/shader/decode/half_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/hfma2.cpp delete mode 100644 src/video_core/shader/decode/image.cpp delete mode 100644 src/video_core/shader/decode/integer_set.cpp delete mode 100644 src/video_core/shader/decode/integer_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/memory.cpp delete mode 100644 src/video_core/shader/decode/other.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_register.cpp delete mode 100644 src/video_core/shader/decode/register_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/shift.cpp delete mode 100644 src/video_core/shader/decode/texture.cpp delete mode 100644 src/video_core/shader/decode/video.cpp delete mode 100644 src/video_core/shader/decode/warp.cpp delete mode 100644 src/video_core/shader/decode/xmad.cpp delete mode 100644 src/video_core/shader/expr.cpp delete mode 100644 src/video_core/shader/expr.h delete mode 100644 src/video_core/shader/memory_util.cpp delete mode 100644 src/video_core/shader/memory_util.h delete mode 100644 src/video_core/shader/node.h delete mode 100644 src/video_core/shader/node_helper.cpp delete mode 100644 src/video_core/shader/node_helper.h delete mode 100644 src/video_core/shader/registry.cpp delete mode 100644 src/video_core/shader/registry.h delete mode 100644 src/video_core/shader/shader_ir.cpp delete mode 100644 src/video_core/shader/shader_ir.h delete mode 100644 src/video_core/shader/track.cpp delete mode 100644 src/video_core/shader/transform_feedback.cpp delete mode 100644 src/video_core/shader/transform_feedback.h diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 311ba1c2e..77358768e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -51,61 +51,7 @@ endif() # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) set(VIDEO_CORE "${SRC_DIR}/src/video_core") set(HASH_FILES - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" + # ... ) set(COMBINED "") foreach (F IN LISTS HASH_FILES) diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 8188e3fbb..07c4a37bc 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0 +Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6 diff --git a/externals/sirit b/externals/sirit index 200310e8f..a39596358 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a +Subproject commit a39596358a3a5488c06554c0c15184a6af71e433 diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e03fffd8d..c92266a17 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp DEPENDS # WARNING! It was too much work to try and make a common location for this list, # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" + # ... # and also check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..c5ce71706 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,7 +29,6 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h - engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp @@ -61,8 +60,6 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h - guest_driver.cpp - guest_driver.h memory_manager.cpp memory_manager.h query_cache.h @@ -71,8 +68,6 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h - renderer_opengl/gl_arb_decompiler.cpp - renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp @@ -85,10 +80,6 @@ add_library(video_core STATIC renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h - renderer_opengl/gl_shader_disk_cache.cpp - renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp @@ -128,8 +119,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp @@ -142,8 +131,6 @@ add_library(video_core STATIC renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp @@ -159,57 +146,6 @@ add_library(video_core STATIC shader_cache.h shader_notify.cpp shader_notify.h - shader/decode/arithmetic.cpp - shader/decode/arithmetic_immediate.cpp - shader/decode/bfe.cpp - shader/decode/bfi.cpp - shader/decode/shift.cpp - shader/decode/arithmetic_integer.cpp - shader/decode/arithmetic_integer_immediate.cpp - shader/decode/arithmetic_half.cpp - shader/decode/arithmetic_half_immediate.cpp - shader/decode/ffma.cpp - shader/decode/hfma2.cpp - shader/decode/conversion.cpp - shader/decode/memory.cpp - shader/decode/texture.cpp - shader/decode/image.cpp - shader/decode/float_set_predicate.cpp - shader/decode/integer_set_predicate.cpp - shader/decode/half_set_predicate.cpp - shader/decode/predicate_set_register.cpp - shader/decode/predicate_set_predicate.cpp - shader/decode/register_set_predicate.cpp - shader/decode/float_set.cpp - shader/decode/integer_set.cpp - shader/decode/half_set.cpp - shader/decode/video.cpp - shader/decode/warp.cpp - shader/decode/xmad.cpp - shader/decode/other.cpp - shader/ast.cpp - shader/ast.h - shader/async_shaders.cpp - shader/async_shaders.h - shader/compiler_settings.cpp - shader/compiler_settings.h - shader/control_flow.cpp - shader/control_flow.h - shader/decode.cpp - shader/expr.cpp - shader/expr.h - shader/memory_util.cpp - shader/memory_util.h - shader/node_helper.cpp - shader/node_helper.h - shader/node.h - shader/registry.cpp - shader/registry.h - shader/shader_ir.cpp - shader/shader_ir.h - shader/track.cpp - shader/transform_feedback.cpp - shader/transform_feedback.h surface.cpp surface.h texture_cache/accelerated_swizzle.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,7 +10,6 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" @@ -40,7 +39,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +208,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,7 +17,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" @@ -49,7 +48,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -1424,23 +1423,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/guest_driver.h" - -namespace VideoCore { - -void GuestDriverProfile::DeduceTextureHandlerSize(std::vector bound_offsets) { - if (texture_handler_size) { - return; - } - const std::size_t size = bound_offsets.size(); - if (size < 2) { - return; - } - std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); - u32 min_val = std::numeric_limits::max(); - for (std::size_t i = 1; i < size; ++i) { - if (bound_offsets[i] == bound_offsets[i - 1]) { - continue; - } - const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; - min_val = std::min(min_val, new_min); - } - if (min_val > 2) { - return; - } - texture_handler_size = min_texture_handler_size * min_val; -} - -} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace VideoCore { - -/** - * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks as they are - * Entscheidungsproblems. - */ -class GuestDriverProfile { -public: - explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional texture_handler_size_) - : texture_handler_size{texture_handler_size_} {} - - void DeduceTextureHandlerSize(std::vector bound_offsets); - - u32 GetTextureHandlerSize() const { - return texture_handler_size.value_or(default_texture_handler_size); - } - - bool IsTextureHandlerSizeKnown() const { - return texture_handler_size.has_value(); - } - -private: - // Minimum size of texture handler any driver can use. - static constexpr u32 min_texture_handler_size = 4; - - // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. - // Thus, certain drivers may squish the size. - static constexpr u32 default_texture_handler_size = 8; - - std::optional texture_handler_size = default_texture_handler_size; -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" -#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -45,7 +44,7 @@ public: virtual void Clear() = 0; /// Dispatches a compute shader invocation - virtual void DispatchCompute(GPUVAddr code_addr) = 0; + virtual void DispatchCompute() = 0; /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; @@ -136,18 +135,5 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { - return guest_driver_profile; - } - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { - return guest_driver_profile; - } - -private: - GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template - std::string AtomicImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward(args)...); - } - - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Trinary, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("P{}.x", static_cast(expr->predicate)); - } - if (const auto expr = std::get_if(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if(&*low); - const auto* const imm_high = std::get_if(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast(index)); - } else if (const auto internal_flag = std::get_if(&*dest)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const u32 target = std::get(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,40 +54,6 @@ namespace { constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -119,44 +85,6 @@ std::pair TransformFeedbackEnum(u8 location) { void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); -} + const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); - ++num_queued_commands; +void RasterizerOpenGL::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast(num_samplers), samplers); - glBindTextures(base_texture, static_cast(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -230,8 +203,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -251,8 +222,6 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector image_view_indices; std::array image_view_ids; boost::container::static_vector sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,307 +20,19 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" namespace OpenGL { -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - -namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } -} - -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; -} - -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; -} - -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; - } - return {}; -} - -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} - -std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); - } - return registry; -} - -std::unordered_set GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast(format)); - } - return supported_formats; -} - -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast(arb.size()), - reinterpret_cast(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; -} - -Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; - } - if (is_built) { - ASSERT(handle != 0); - } -} +Shader::Shader() = default; Shader::~Shader() = default; -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared(); - return std::unique_ptr( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); - } -} - -std::unique_ptr Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; - } - - std::vector gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); - } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); - } - - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector> contexts(num_workers); - std::vector threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); - } - for (auto& thread : threads) { - thread.join(); - } - - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } - - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; - } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; - } - } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; -} - -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast(program)]; - if (last_shader->IsBuilt()) { - return last_shader; - } - } - - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; - - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; - } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); - } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); - } - } - - // Look up shader in the cache based on address - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); - } - - return last_shaders[static_cast(program)] = result; -} - -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -19,10 +19,6 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -33,10 +29,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; @@ -44,77 +36,10 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class Shader { public: + explicit Shader(); ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); - - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); - - static std::unique_ptr CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); - - static std::unique_ptr CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); - -private: - explicit Shader(std::shared_ptr registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { @@ -126,36 +51,13 @@ public: Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); - - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); - - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); - private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats); - Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; - - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map runtime_cache; - - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair; -using TextureIR = std::variant; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return ""; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast(index) >= static_cast(Attribute::Index::TexCoord_0) && - static_cast(index) <= static_cast(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast(value + static_cast(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast(index) - static_cast(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast(static_cast(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison(operation, "lessThan"); - } - - template - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison(operation, "equal"); - } - - template - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison(operation, "lessThanEqual"); - } - - template - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison(operation, "greaterThan"); - } - - template - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison(operation, "notEqual"); - } - - template - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast(flag); - ASSERT(index < static_cast(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map transform_feedback; - - ShaderWriter code; - - std::optional max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector const_buffers; - std::vector global_memory_entries; - std::vector samplers; - std::vector images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector flat_keys(num_keys); - std::vector flat_bound_samplers(num_bound_samplers); - std::vector flat_separate_samplers(num_separate_samplers); - std::vector flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast(type)) || - !file.WriteObject(static_cast(code.size())) || - !file.WriteObject(static_cast(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast(keys.size())) || - !file.WriteObject(static_cast(bound_samplers.size())) || - !file.WriteObject(static_cast(separate_samplers.size())) || - !file.WriteObject(static_cast(bindless_samplers.size()))) { - return false; - } - - std::vector flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector entries; - while (static_cast(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -8,146 +8,14 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} +ComputePipeline::ComputePipeline() = default; -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; - } - - return device.GetLogical().CreateComputePipeline(ci); -} +ComputePipeline::~ComputePipeline() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -15,50 +14,10 @@ class Device; class VKScheduler; class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector& code) const; - - vk::Pipeline CreatePipeline() const; - - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; - vk::Pipeline pipeline; + explicit ComputePipeline(); + ~ComputePipeline(); }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -MICROPROFILE_DECLARE(Vulkan_PipelineCache); - -namespace { - -template -VkStencilOpState GetStencilFaceState(const StencilFace& face) { - return { - .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), - .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), - .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), - .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), - .compareMask = 0, - .writeMask = 0, - .reference = 0, - }; -} - -bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); -} - -VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union Swizzle { - u32 raw; - BitField<0, 3, Maxwell::ViewportSwizzle> x; - BitField<4, 3, Maxwell::ViewportSwizzle> y; - BitField<8, 3, Maxwell::ViewportSwizzle> z; - BitField<12, 3, Maxwell::ViewportSwizzle> w; - }; - const Swizzle unpacked{swizzle}; - - return { - .x = MaxwellToVK::ViewportSwizzle(unpacked.x), - .y = MaxwellToVK::ViewportSwizzle(unpacked.y), - .z = MaxwellToVK::ViewportSwizzle(unpacked.z), - .w = MaxwellToVK::ViewportSwizzle(unpacked.w), - }; -} - -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; - } -} - -} // Anonymous namespace - -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); -} - -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); -} - -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); - } - } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); -} - -std::vector VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; - - std::vector shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; - } - - device.SaveShader(stage->code); - - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); - } - return shader_modules; -} - -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector vertex_bindings; - std::vector vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = state.binding_divisors[index], - }); - } - } - - std::vector vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; - } - if (!input_attributes.contains(static_cast(index))) { - // Skip attributes not used by the vertex shaders. - continue; - } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); - } - - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), - }; - - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), - .pVertexBindingDivisors = vertex_binding_divisors.data(), - }; - if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &input_divisor_ci; - } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), - }; - - const VkPipelineTessellationStateCreateInfo tessellation_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; - - std::array swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewportSwizzles = swizzles.data(), - }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), - .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = static_cast( - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), - .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisample_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthTestEnable = dynamic.depth_test_enable, - .depthWriteEnable = dynamic.depth_write_enable, - .depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, - .stencilTestEnable = dynamic.stencil_enable, - .front = GetStencilFaceState(dynamic.front), - .back = GetStencilFaceState(dynamic.back), - .minDepthBounds = 0.0f, - .maxDepthBounds = 0.0f, - }; - - std::array cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ - VK_COLOR_COMPONENT_R_BIT, - VK_COLOR_COMPONENT_G_BIT, - VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT, - }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } - } - - cb_attachments[index] = { - .blendEnable = blend.enable != 0, - .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), - .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), - .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), - .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), - .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), - .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; - } - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, - .pAttachments = cb_attachments.data(), - .blendConstants = {}, - }; - - std::vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended{ - VK_DYNAMIC_STATE_CULL_MODE_EXT, - VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, - VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_OP_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - } - - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - std::vector shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { - continue; - } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { - stage_ci.pNext = &subgroup_size_ci; - } - } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class Device; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - -using SPIRVProgram = std::array, Maxwell::MaxShaderStage>; - -class VKGraphicsPipeline final { -public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span bindings) const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; - - std::vector CreateShaderModules(const SPIRVProgram& program) const; - - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; - - const Device& device; - VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector modules; - - vk::Pipeline pipeline; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,49 +19,27 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { - -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { +size_t StageFromProgram(size_t program) { return program == 0 ? 0 : program - 1; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(GetStageFromProgram(static_cast(program))); +ShaderType StageFromProgram(Maxwell::ShaderProgram program) { + return static_cast(StageFromProgram(static_cast(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { return ShaderType::Vertex; } } - -template -void AddBindings(std::vector& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; - } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); - } -} - -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings(bindings, binding, flags, entries.const_buffers); - AddBindings(bindings, binding, flags, entries.global_buffers); - AddBindings(bindings, binding, flags, entries.uniform_texels); - AddBindings(bindings, binding, flags, entries.samplers); - AddBindings(bindings, binding, flags, entries.storage_texels); - AddBindings(bindings, binding, flags, entries.images); - return binding; -} - } // Anonymous namespace -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); - return static_cast(hash); -} - -bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, Size()) == 0; -} - -std::size_t ComputePipelineCacheKey::Hash() const noexcept { +size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); + return static_cast(hash); } bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} +Shader::Shader() = default; Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} -VKPipelineCache::~VKPipelineCache() = default; +PipelineCache::~PipelineCache() = default; -std::array VKPipelineCache::GetShaders() { - std::array shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); - - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } - } - shaders[index] = result; - } - return last_shaders = shaders; -} - -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; - } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); - } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; -} - -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader_info = std::make_unique(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); - - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } - } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; + throw "Bad"; } -void VKPipelineCache::EmplacePipeline(std::unique_ptr pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); -} - -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; - continue; - } - Finish(); - it = graphics_cache.erase(it); - } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; - continue; - } - Finish(); - it = compute_cache.erase(it); - } -} - -std::pair> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector bindings; - - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); - } - return {std::move(program), std::move(bindings)}; -} - -template -void AddEntry(std::vector& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; - } - return; - } - - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - } - offset += count * entry_size; - binding += count; -} - -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries) { - AddEntry(template_entries, offset, binding, entries.const_buffers); - AddEntry(template_entries, offset, binding, entries.global_buffers); - AddEntry(template_entries, offset, binding, entries.uniform_texels); - AddEntry(template_entries, offset, binding, entries.samplers); - AddEntry(template_entries, offset, binding, entries.storage_texels); - AddEntry(template_entries, offset, binding, entries.images); -} +void PipelineCache::OnShaderRemoval(Shader*) {} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -15,15 +15,8 @@ #include #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -35,7 +28,7 @@ namespace Vulkan { class Device; class RasterizerVulkan; -class VKComputePipeline; +class ComputePipeline; class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { u32 shared_memory_size; std::array workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v); namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - template <> struct hash { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -83,66 +69,26 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); + explicit Shader(); ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; }; -class VKPipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~PipelineCache() override; - std::array GetShaders(); - - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); - - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - - void EmplacePipeline(std::unique_ptr pipeline); + ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: void OnShaderRemoval(Shader* shader) final; private: - std::pair> DecompileShaders( - const FixedPipelineState& fixed_state); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -158,17 +104,8 @@ private: std::array last_shaders{}; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::mutex pipeline_cache; - std::unordered_map> - graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> compute_cache; }; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries); - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array GetShaderAddresses( - const std::array& shaders) { - std::array addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { const Tegra::Texture::TextureHandle handle{data}; @@ -117,98 +107,6 @@ struct TextureHandle { u32 sampler; }; -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast(stage); - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra descriptor_pool, update_descriptor_queue), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - - SCOPE_EXIT({ gpu.TickWork(); }); - FlushWork(); - - query_cache.UpdateCounters(); - - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. - return; - } - - BeginTransformFeedback(); - - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - UpdateDynamicStates(); - - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); - } - }); - - EndTransformFeedback(); + UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); } void RasterizerVulkan::Clear() { @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); - - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); - - // Compute dispatches can't be executed inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); +void RasterizerVulkan::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,7 +28,6 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -103,19 +102,6 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -125,40 +111,12 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -198,13 +156,12 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector image_view_indices; std::array image_view_ids; diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null @@ -1,752 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { - -ASTZipper::ASTZipper() = default; - -void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { - ASSERT(new_first->manager == nullptr); - first = new_first; - last = new_first; - - ASTNode current = first; - while (current) { - current->manager = this; - current->parent = parent; - last = current; - current = current->next; - } -} - -void ASTZipper::PushBack(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous = last; - if (last) { - last->next = new_node; - } - new_node->next.reset(); - last = new_node; - if (!first) { - first = new_node; - } - new_node->manager = this; -} - -void ASTZipper::PushFront(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous.reset(); - new_node->next = first; - if (first) { - first->previous = new_node; - } - if (last == first) { - last = new_node; - } - first = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushFront(new_node); - return; - } - const ASTNode next = at_node->next; - if (next) { - next->previous = new_node; - } - new_node->previous = at_node; - if (at_node == last) { - last = new_node; - } - new_node->next = next; - at_node->next = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushBack(new_node); - return; - } - const ASTNode previous = at_node->previous; - if (previous) { - previous->next = new_node; - } - new_node->next = at_node; - if (at_node == first) { - first = new_node; - } - new_node->previous = previous; - at_node->previous = new_node; - new_node->manager = this; -} - -void ASTZipper::DetachTail(ASTNode node) { - ASSERT(node->manager == this); - if (node == first) { - first.reset(); - last.reset(); - return; - } - - last = node->previous; - last->next.reset(); - node->previous.reset(); - - ASTNode current = std::move(node); - while (current) { - current->manager = nullptr; - current->parent.reset(); - current = current->next; - } -} - -void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { - ASSERT(start->manager == this && end->manager == this); - if (start == end) { - DetachSingle(start); - return; - } - const ASTNode prev = start->previous; - const ASTNode post = end->next; - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - start->previous.reset(); - end->next.reset(); - ASTNode current = start; - bool found = false; - while (current) { - current->manager = nullptr; - current->parent.reset(); - found |= current == end; - current = current->next; - } - ASSERT(found); -} - -void ASTZipper::DetachSingle(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode prev = node->previous; - const ASTNode post = node->next; - node->previous.reset(); - node->next.reset(); - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - - node->manager = nullptr; - node->parent.reset(); -} - -void ASTZipper::Remove(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode next = node->next; - const ASTNode previous = node->previous; - if (previous) { - previous->next = next; - } - if (next) { - next->previous = previous; - } - node->parent.reset(); - node->manager = nullptr; - if (node == last) { - last = previous; - } - if (node == first) { - first = next; - } -} - -class ExprPrinter final { -public: - void operator()(const ExprAnd& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += "!"; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - inner += fmt::format("P{}", expr.predicate); - } - - void operator()(const ExprCondCode& expr) { - inner += fmt::format("CC{}", expr.cc); - } - - void operator()(const ExprVar& expr) { - inner += fmt::format("V{}", expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(const ExprGprEqual& expr) { - inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string inner; -}; - -class ASTPrinter { -public: - void operator()(const ASTProgram& ast) { - scope++; - inner += "program {\n"; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - inner += "}\n"; - scope--; - } - - void operator()(const ASTIfThen& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}}\n", Indent()); - } - - void operator()(const ASTIfElse& ast) { - inner += Indent(); - inner += "else {\n"; - - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - - inner += Indent(); - inner += "}\n"; - } - - void operator()(const ASTBlockEncoded& ast) { - inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); - } - - void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { - inner += Indent(); - inner += "Block;\n"; - } - - void operator()(const ASTVarSet& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - inner += fmt::format("Label_{}:\n", ast.index); - } - - void operator()(const ASTGoto& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += - fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); - } - - void operator()(const ASTDoWhile& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}do {{\n", Indent()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), - ast.kills ? "discard" : "exit"); - } - - void operator()(const ASTBreak& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string_view Indent() { - if (space_segment_scope == scope) { - return space_segment; - } - - // Ensure that we don't exceed our view. - ASSERT(scope * 2 < spaces.size()); - - space_segment = spaces.substr(0, scope * 2); - space_segment_scope = scope; - return space_segment; - } - - std::string inner{}; - std::string_view space_segment; - - u32 scope{}; - u32 space_segment_scope{}; - - static constexpr std::string_view spaces{" "}; -}; - -std::string ASTManager::Print() const { - ASTPrinter printer{}; - printer.Visit(main_node); - return printer.GetResult(); -} - -ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) - : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} - -ASTManager::~ASTManager() { - Clear(); -} - -void ASTManager::Init() { - main_node = ASTBase::Make(ASTNode{}); - program = std::get_if(main_node->GetInnerData()); - false_condition = MakeExpr(false); -} - -void ASTManager::DeclareLabel(u32 address) { - const auto pair = labels_map.emplace(address, labels_count); - if (pair.second) { - labels_count++; - labels.resize(labels_count); - } -} - -void ASTManager::InsertLabel(u32 address) { - const u32 index = labels_map[address]; - const ASTNode label = ASTBase::Make(main_node, index); - labels[index] = label; - program->nodes.PushBack(label); -} - -void ASTManager::InsertGoto(Expr condition, u32 address) { - const u32 index = labels_map[address]; - const ASTNode goto_node = ASTBase::Make(main_node, std::move(condition), index); - gotos.push_back(goto_node); - program->nodes.PushBack(goto_node); -} - -void ASTManager::InsertBlock(u32 start_address, u32 end_address) { - ASTNode block = ASTBase::Make(main_node, start_address, end_address); - program->nodes.PushBack(std::move(block)); -} - -void ASTManager::InsertReturn(Expr condition, bool kills) { - ASTNode node = ASTBase::Make(main_node, std::move(condition), kills); - program->nodes.PushBack(std::move(node)); -} - -// The decompile algorithm is based on -// "Taming control flow: A structured approach to eliminating goto statements" -// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be -// on the same structured level as the label which they jump to. This is done, -// through outward/inward movements and lifting. Once they are at the same -// level, you can enclose them in an "if" structure or a "do-while" structure. -void ASTManager::Decompile() { - auto it = gotos.begin(); - while (it != gotos.end()) { - const ASTNode goto_node = *it; - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - const ASTNode label = labels[*label_index]; - if (!full_decompile) { - // We only decompile backward jumps - if (!IsBackwardsJump(goto_node, label)) { - it++; - continue; - } - } - if (IndirectlyRelated(goto_node, label)) { - while (!DirectlyRelated(goto_node, label)) { - MoveOutward(goto_node); - } - } - if (DirectlyRelated(goto_node, label)) { - u32 goto_level = goto_node->GetLevel(); - const u32 label_level = label->GetLevel(); - while (label_level < goto_level) { - MoveOutward(goto_node); - goto_level--; - } - // TODO(Blinkhawk): Implement Lifting and Inward Movements - } - if (label->GetParent() == goto_node->GetParent()) { - bool is_loop = false; - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label) { - is_loop = true; - break; - } - current = current->GetPrevious(); - } - - if (is_loop) { - EncloseDoWhile(goto_node, label); - } else { - EncloseIfThen(goto_node, label); - } - it = gotos.erase(it); - continue; - } - it++; - } - if (full_decompile) { - for (const ASTNode& label : labels) { - auto& manager = label->GetManager(); - manager.Remove(label); - } - labels.clear(); - } else { - auto label_it = labels.begin(); - while (label_it != labels.end()) { - bool can_remove = true; - ASTNode label = *label_it; - for (const ASTNode& goto_node : gotos) { - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - ASTNode& glabel = labels[*label_index]; - if (glabel == label) { - can_remove = false; - break; - } - } - if (can_remove) { - label->MarkLabelUnused(); - } - } - } -} - -bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { - u32 goto_level = goto_node->GetLevel(); - u32 label_level = label_node->GetLevel(); - while (goto_level > label_level) { - goto_level--; - goto_node = goto_node->GetParent(); - } - while (label_level > goto_level) { - label_level--; - label_node = label_node->GetParent(); - } - while (goto_node->GetParent() != label_node->GetParent()) { - goto_node = goto_node->GetParent(); - label_node = label_node->GetParent(); - } - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label_node) { - return true; - } - current = current->GetPrevious(); - } - return false; -} - -bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { - return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); -} - -bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { - if (first->GetParent() == second->GetParent()) { - return false; - } - const u32 first_level = first->GetLevel(); - const u32 second_level = second->GetLevel(); - u32 min_level; - u32 max_level; - ASTNode max; - ASTNode min; - if (first_level > second_level) { - min_level = second_level; - min = second; - max_level = first_level; - max = first; - } else { - min_level = first_level; - min = first; - max_level = second_level; - max = second; - } - - while (max_level > min_level) { - max_level--; - max = max->GetParent(); - } - - return min->GetParent() == max->GetParent(); -} - -void ASTManager::ShowCurrentState(std::string_view state) const { - LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); - SanityCheck(); -} - -void ASTManager::SanityCheck() const { - for (const auto& label : labels) { - if (!label->GetParent()) { - LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); - } - } -} - -void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode loop_start = label->GetNext(); - if (loop_start == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode parent = label->GetParent(); - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSegment(loop_start, goto_node); - const ASTNode do_while_node = ASTBase::Make(parent, condition); - ASTZipper* sub_zipper = do_while_node->GetSubNodes(); - sub_zipper->Init(loop_start, do_while_node); - zipper.InsertAfter(do_while_node, label); - sub_zipper->Remove(goto_node); -} - -void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode if_end = label->GetPrevious(); - if (if_end == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode prev = goto_node->GetPrevious(); - const Expr condition = goto_node->GetGotoCondition(); - bool do_else = false; - if (!disable_else_derivation && prev->IsIfThen()) { - const Expr if_condition = prev->GetIfCondition(); - do_else = ExprAreEqual(if_condition, condition); - } - const ASTNode parent = label->GetParent(); - zipper.DetachSegment(goto_node, if_end); - ASTNode if_node; - if (do_else) { - if_node = ASTBase::Make(parent); - } else { - Expr neg_condition = MakeExprNot(condition); - if_node = ASTBase::Make(parent, neg_condition); - } - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(goto_node, if_node); - zipper.InsertAfter(if_node, prev); - sub_zipper->Remove(goto_node); -} - -void ASTManager::MoveOutward(ASTNode goto_node) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode parent = goto_node->GetParent(); - ASTZipper& zipper2 = parent->GetManager(); - const ASTNode grandpa = parent->GetParent(); - const bool is_loop = parent->IsLoop(); - const bool is_else = parent->IsIfElse(); - const bool is_if = parent->IsIfThen(); - - const ASTNode prev = goto_node->GetPrevious(); - const ASTNode post = goto_node->GetNext(); - - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSingle(goto_node); - if (is_loop) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - zipper2.InsertBefore(var_node_init, parent); - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - const ASTNode break_node = ASTBase::Make(parent, var_condition); - zipper.InsertAfter(break_node, var_node); - } else if (is_if || is_else) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - if (is_if) { - zipper2.InsertBefore(var_node_init, parent); - } else { - zipper2.InsertBefore(var_node_init, parent->GetPrevious()); - } - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - if (post) { - zipper.DetachTail(post); - const ASTNode if_node = ASTBase::Make(parent, MakeExprNot(var_condition)); - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(post, if_node); - zipper.InsertAfter(if_node, var_node); - } - } else { - UNREACHABLE(); - } - const ASTNode next = parent->GetNext(); - if (is_if && next && next->IsIfElse()) { - zipper2.InsertAfter(goto_node, next); - goto_node->SetParent(grandpa); - return; - } - zipper2.InsertAfter(goto_node, parent); - goto_node->SetParent(grandpa); -} - -class ASTClearer { -public: - ASTClearer() = default; - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) { - ast.nodes.clear(); - } - - void operator()([[maybe_unused]] const ASTVarSet& ast) {} - - void operator()([[maybe_unused]] const ASTLabel& ast) {} - - void operator()([[maybe_unused]] const ASTGoto& ast) {} - - void operator()(const ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTReturn& ast) {} - - void operator()([[maybe_unused]] const ASTBreak& ast) {} - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - node->Clear(); - } -}; - -void ASTManager::Clear() { - if (!main_node) { - return; - } - ASTClearer clearer{}; - clearer.Visit(main_node); - main_node.reset(); - program = nullptr; - labels_map.clear(); - labels.clear(); - gotos.clear(); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "video_core/shader/expr.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -class ASTBase; -class ASTBlockDecoded; -class ASTBlockEncoded; -class ASTBreak; -class ASTDoWhile; -class ASTGoto; -class ASTIfElse; -class ASTIfThen; -class ASTLabel; -class ASTProgram; -class ASTReturn; -class ASTVarSet; - -using ASTData = std::variant; - -using ASTNode = std::shared_ptr; - -enum class ASTZipperType : u32 { - Program, - IfThen, - IfElse, - Loop, -}; - -class ASTZipper final { -public: - explicit ASTZipper(); - - void Init(ASTNode first, ASTNode parent); - - ASTNode GetFirst() const { - return first; - } - - ASTNode GetLast() const { - return last; - } - - void PushBack(ASTNode new_node); - void PushFront(ASTNode new_node); - void InsertAfter(ASTNode new_node, ASTNode at_node); - void InsertBefore(ASTNode new_node, ASTNode at_node); - void DetachTail(ASTNode node); - void DetachSingle(ASTNode node); - void DetachSegment(ASTNode start, ASTNode end); - void Remove(ASTNode node); - - ASTNode first; - ASTNode last; -}; - -class ASTProgram { -public: - ASTZipper nodes{}; -}; - -class ASTIfThen { -public: - explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTIfElse { -public: - ASTZipper nodes{}; -}; - -class ASTBlockEncoded { -public: - explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} - u32 start; - u32 end; -}; - -class ASTBlockDecoded { -public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} - NodeBlock nodes; -}; - -class ASTVarSet { -public: - explicit ASTVarSet(u32 index_, Expr condition_) - : index{index_}, condition{std::move(condition_)} {} - - u32 index; - Expr condition; -}; - -class ASTLabel { -public: - explicit ASTLabel(u32 index_) : index{index_} {} - u32 index; - bool unused{}; -}; - -class ASTGoto { -public: - explicit ASTGoto(Expr condition_, u32 label_) - : condition{std::move(condition_)}, label{label_} {} - - Expr condition; - u32 label; -}; - -class ASTDoWhile { -public: - explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTReturn { -public: - explicit ASTReturn(Expr condition_, bool kills_) - : condition{std::move(condition_)}, kills{kills_} {} - - Expr condition; - bool kills; -}; - -class ASTBreak { -public: - explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; -}; - -class ASTBase { -public: - explicit ASTBase(ASTNode parent_, ASTData data_) - : data{std::move(data_)}, parent{std::move(parent_)} {} - - template - static ASTNode Make(ASTNode parent, Args&&... args) { - return std::make_shared(std::move(parent), - ASTData(U(std::forward(args)...))); - } - - void SetParent(ASTNode new_parent) { - parent = std::move(new_parent); - } - - ASTNode& GetParent() { - return parent; - } - - const ASTNode& GetParent() const { - return parent; - } - - u32 GetLevel() const { - u32 level = 0; - auto next_parent = parent; - while (next_parent) { - next_parent = next_parent->GetParent(); - level++; - } - return level; - } - - ASTData* GetInnerData() { - return &data; - } - - const ASTData* GetInnerData() const { - return &data; - } - - ASTNode GetNext() const { - return next; - } - - ASTNode GetPrevious() const { - return previous; - } - - ASTZipper& GetManager() { - return *manager; - } - - const ASTZipper& GetManager() const { - return *manager; - } - - std::optional GetGotoLabel() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->label}; - } - return std::nullopt; - } - - Expr GetGotoCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void MarkLabelUnused() { - if (auto* inner = std::get_if(&data)) { - inner->unused = true; - } - } - - bool IsLabelUnused() const { - if (const auto* inner = std::get_if(&data)) { - return inner->unused; - } - return true; - } - - std::optional GetLabelIndex() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->index}; - } - return std::nullopt; - } - - Expr GetIfCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void SetGotoCondition(Expr new_condition) { - if (auto* inner = std::get_if(&data)) { - inner->condition = std::move(new_condition); - } - } - - bool IsIfThen() const { - return std::holds_alternative(data); - } - - bool IsIfElse() const { - return std::holds_alternative(data); - } - - bool IsBlockEncoded() const { - return std::holds_alternative(data); - } - - void TransformBlockEncoded(NodeBlock&& nodes) { - data = ASTBlockDecoded(std::move(nodes)); - } - - bool IsLoop() const { - return std::holds_alternative(data); - } - - ASTZipper* GetSubNodes() { - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - return nullptr; - } - - void Clear() { - next.reset(); - previous.reset(); - parent.reset(); - manager = nullptr; - } - -private: - friend class ASTZipper; - - ASTData data; - ASTNode parent; - ASTNode next; - ASTNode previous; - ASTZipper* manager{}; -}; - -class ASTManager final { -public: - explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); - ~ASTManager(); - - ASTManager(const ASTManager& o) = delete; - ASTManager& operator=(const ASTManager& other) = delete; - - ASTManager(ASTManager&& other) noexcept = default; - ASTManager& operator=(ASTManager&& other) noexcept = default; - - void Init(); - - void DeclareLabel(u32 address); - - void InsertLabel(u32 address); - - void InsertGoto(Expr condition, u32 address); - - void InsertBlock(u32 start_address, u32 end_address); - - void InsertReturn(Expr condition, bool kills); - - std::string Print() const; - - void Decompile(); - - void ShowCurrentState(std::string_view state) const; - - void SanityCheck() const; - - void Clear(); - - bool IsFullyDecompiled() const { - if (full_decompile) { - return gotos.empty(); - } - - for (ASTNode goto_node : gotos) { - auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return false; - } - ASTNode glabel = labels[*label_index]; - if (IsBackwardsJump(goto_node, glabel)) { - return false; - } - } - return true; - } - - ASTNode GetProgram() const { - return main_node; - } - - u32 GetVariables() const { - return variables; - } - - const std::vector& GetLabels() const { - return labels; - } - -private: - bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; - - bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - void EncloseDoWhile(ASTNode goto_node, ASTNode label); - - void EncloseIfThen(ASTNode goto_node, ASTNode label); - - void MoveOutward(ASTNode goto_node); - - u32 NewVariable() { - return variables++; - } - - bool full_decompile{}; - bool disable_else_derivation{}; - std::unordered_map labels_map{}; - u32 labels_count{}; - std::vector labels{}; - std::list gotos{}; - u32 variables{}; - ASTProgram* program{}; - ASTNode main_node{}; - Expr false_condition{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/shader/async_shaders.h" - -namespace VideoCommon::Shader { - -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} - -AsyncShaders::~AsyncShaders() { - KillWorkers(); -} - -void AsyncShaders::AllocateWorkers() { - // Use at least one thread - u32 num_workers = 1; - - // Deduce how many more threads we can use - const u32 thread_count = std::thread::hardware_concurrency(); - if (thread_count >= 8) { - // Increase async workers by 1 for every 2 threads >= 8 - num_workers += 1 + (thread_count - 8) / 2; - } - - // If we already have workers queued, ignore - if (num_workers == worker_threads.size()) { - return; - } - - // If workers already exist, clear them - if (!worker_threads.empty()) { - FreeWorkers(); - } - - // Create workers - for (std::size_t i = 0; i < num_workers; i++) { - context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, - context_list[i].get()); - } -} - -void AsyncShaders::FreeWorkers() { - // Mark all threads to quit - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.join(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -void AsyncShaders::KillWorkers() { - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.detach(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -bool AsyncShaders::HasWorkQueued() const { - return !pending_queue.empty(); -} - -bool AsyncShaders::HasCompletedWork() const { - std::shared_lock lock{completed_mutex}; - return !finished_work.empty(); -} - -bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { - const auto& regs = gpu.Maxwell3D().regs; - - // If something is using depth, we can assume that games are not rendering anything which will - // be used one time. - if (regs.zeta_enable) { - return true; - } - - // If games are using a small index count, we can assume these are full screen quads. Usually - // these shaders are only used once for building textures so we can assume they can't be built - // async - if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { - return false; - } - - return true; -} - -std::vector AsyncShaders::GetCompletedWork() { - std::vector results; - { - std::unique_lock lock{completed_mutex}; - results = std::move(finished_work); - finished_work.clear(); - } - return results; -} - -void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, - Tegra::Engines::ShaderType shader_type, u64 uid, - std::vector code, std::vector code_b, - u32 main_offset, CompilerSettings compiler_settings, - const Registry& registry, VAddr cpu_addr) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, - .device = &device, - .shader_type = shader_type, - .uid = uid, - .code = std::move(code), - .code_b = std::move(code_b), - .main_offset = main_offset, - .compiler_settings = compiler_settings, - .registry = registry, - .cpu_address = cpu_addr, - .pp_cache = nullptr, - .vk_device = nullptr, - .scheduler = nullptr, - .descriptor_pool = nullptr, - .update_descriptor_queue = nullptr, - .bindings{}, - .program{}, - .key{}, - .num_color_buffers = 0, - }); - cv.notify_one(); -} - -void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = Backend::Vulkan, - .device = nullptr, - .shader_type{}, - .uid = 0, - .code{}, - .code_b{}, - .main_offset = 0, - .compiler_settings{}, - .registry{}, - .cpu_address = 0, - .pp_cache = pp_cache, - .vk_device = &device, - .scheduler = &scheduler, - .descriptor_pool = &descriptor_pool, - .update_descriptor_queue = &update_descriptor_queue, - .bindings = std::move(bindings), - .program = std::move(program), - .key = key, - .num_color_buffers = num_color_buffers, - }); - cv.notify_one(); -} - -void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - while (!is_thread_exiting.load(std::memory_order_relaxed)) { - std::unique_lock lock{queue_mutex}; - cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); - if (is_thread_exiting) { - return; - } - - // Partial lock to allow all threads to read at the same time - if (!HasWorkQueued()) { - continue; - } - // Another thread beat us, just unlock and wait for the next load - if (pending_queue.empty()) { - continue; - } - - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop(); - lock.unlock(); - - if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); - const auto scope = context->Acquire(); - auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); - Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; - - if (work.backend == Backend::OpenGL) { - result.program.opengl = std::move(program->source_program); - } else if (work.backend == Backend::GLASM) { - result.program.glasm = std::move(program->assembly_program); - } - - { - std::unique_lock complete_lock(completed_mutex); - finished_work.push_back(std::move(result)); - } - } else if (work.backend == Backend::Vulkan) { - auto pipeline = std::make_unique( - *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, work.key, work.bindings, work.program, - work.num_color_buffers); - - work.pp_cache->EmplacePipeline(std::move(pipeline)); - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Core::Frontend { -class EmuWindow; -class GraphicsContext; -} // namespace Core::Frontend - -namespace Tegra { -class GPU; -} - -namespace Vulkan { -class VKPipelineCache; -} - -namespace VideoCommon::Shader { - -class AsyncShaders { -public: - enum class Backend { - OpenGL, - GLASM, - Vulkan, - }; - - struct ResultPrograms { - OpenGL::OGLProgram opengl; - OpenGL::OGLAssemblyProgram glasm; - }; - - struct Result { - u64 uid; - VAddr cpu_address; - Backend backend; - ResultPrograms program; - std::vector code; - std::vector code_b; - Tegra::Engines::ShaderType shader_type; - }; - - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); - ~AsyncShaders(); - - /// Start up shader worker threads - void AllocateWorkers(); - - /// Clear the shader queue and kill all worker threads - void FreeWorkers(); - - // Force end all threads - void KillWorkers(); - - /// Check to see if any shaders have actually been compiled - [[nodiscard]] bool HasCompletedWork() const; - - /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build - /// every shader async as some shaders are only built and executed once. We try to "guess" which - /// shader would be used only once - [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; - - /// Pulls completed compiled shaders - [[nodiscard]] std::vector GetCompletedWork(); - - void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, - u64 uid, std::vector code, std::vector code_b, u32 main_offset, - CompilerSettings compiler_settings, const Registry& registry, - VAddr cpu_addr); - - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, - Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, - u32 num_color_buffers); - -private: - void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); - - /// Check our worker queue to see if we have any work queued already - [[nodiscard]] bool HasWorkQueued() const; - - struct WorkerParams { - Backend backend; - // For OGL - const OpenGL::Device* device; - Tegra::Engines::ShaderType shader_type; - u64 uid; - std::vector code; - std::vector code_b; - u32 main_offset; - CompilerSettings compiler_settings; - std::optional registry; - VAddr cpu_address; - - // For Vulkan - Vulkan::VKPipelineCache* pp_cache; - const Vulkan::Device* vk_device; - Vulkan::VKScheduler* scheduler; - Vulkan::VKDescriptorPool* descriptor_pool; - Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - std::vector bindings; - Vulkan::SPIRVProgram program; - Vulkan::GraphicsPipelineCacheKey key; - u32 num_color_buffers; - }; - - std::condition_variable cv; - mutable std::mutex queue_mutex; - mutable std::shared_mutex completed_mutex; - std::atomic is_thread_exiting{}; - std::vector> context_list; - std::vector worker_threads; - std::queue pending_queue; - std::vector finished_work; - Core::Frontend::EmuWindow& emu_window; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/shader/compiler_settings.h" - -namespace VideoCommon::Shader { - -std::string CompileDepthAsString(const CompileDepth cd) { - switch (cd) { - case CompileDepth::BruteForce: - return "Brute Force Compile"; - case CompileDepth::FlowStack: - return "Simple Flow Stack Mode"; - case CompileDepth::NoFlowStack: - return "Remove Flow Stack"; - case CompileDepth::DecompileBackwards: - return "Decompile Backward Jumps"; - case CompileDepth::FullDecompile: - return "Full Decompilation"; - default: - return "Unknown Compiler Process"; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class CompileDepth : u32 { - BruteForce = 0, - FlowStack = 1, - NoFlowStack = 2, - DecompileBackwards = 3, - FullDecompile = 4, -}; - -std::string CompileDepthAsString(CompileDepth cd); - -struct CompilerSettings { - CompileDepth depth{CompileDepth::NoFlowStack}; - bool disable_else_derivation{true}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -constexpr s32 unassigned_branch = -2; - -struct Query { - u32 address{}; - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -struct BlockStack { - BlockStack() = default; - explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -template -BlockBranchInfo MakeBranchInfo(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool BlockBranchIsIgnored(BlockBranchInfo first) { - bool ignore = false; - if (std::holds_alternative(*first)) { - const auto branch = std::get_if(first.get()); - ignore = branch->ignore; - } - return ignore; -} - -struct BlockInfo { - u32 start{}; - u32 end{}; - bool visited{}; - BlockBranchInfo branch{}; - - bool IsInside(const u32 address) const { - return start <= address && address <= end; - } -}; - -struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) - : program_code{program_code_}, registry{registry_}, start{start_} {} - - const ProgramCode& program_code; - Registry& registry; - u32 start{}; - std::vector block_info; - std::list inspect_queries; - std::list queries; - std::unordered_map registered; - std::set labels; - std::map ssy_labels; - std::map pbk_labels; - std::unordered_map stacks; - ASTManager* manager{}; -}; - -enum class BlockCollision : u32 { None, Found, Inside }; - -std::pair TryGetBlock(CFGRebuildState& state, u32 address) { - const auto& blocks = state.block_info; - for (u32 index = 0; index < blocks.size(); index++) { - if (blocks[index].start == address) { - return {BlockCollision::Found, index}; - } - if (blocks[index].IsInside(address)) { - return {BlockCollision::Inside, index}; - } - } - return {BlockCollision::None, 0xFFFFFFFF}; -} - -struct ParseInfo { - BlockBranchInfo branch_info{}; - u32 end_address{}; -}; - -BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { - auto& it = state.block_info.emplace_back(); - it.start = start; - it.end = end; - const u32 index = static_cast(state.block_info.size() - 1); - state.registered.insert({start, index}); - return it; -} - -Pred GetPredicate(u32 index, bool negated) { - return static_cast(static_cast(index) + (negated ? 8ULL : 0ULL)); -} - -enum class ParseResult : u32 { - ControlCaught, - BlockEnd, - AbnormalFlow, -}; - -struct BranchIndirectInfo { - u32 buffer{}; - u32 offset{}; - u32 entries{}; - s32 relative_position{}; -}; - -struct BufferInfo { - u32 index; - u32 offset; -}; - -std::optional> GetBRXInfo(const CFGRebuildState& state, u32& pos) { - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() != OpCode::Id::BRX) { - return std::nullopt; - } - if (instr.brx.constant_buffer != 0) { - return std::nullopt; - } - --pos; - return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); -} - -template -// requires std::predicate -// requires std::invocable -std::optional TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, - PackCallable pack) { - for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.start)) { - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (!opcode) { - continue; - } - if (test(instr, opcode->get())) { - --pos; - return std::make_optional(pack(instr, opcode->get())); - } - } - return std::nullopt; -} - -std::optional> TrackLDC(const CFGRebuildState& state, u32& pos, - u64 brx_tracked_register) { - return TrackInstruction>( - state, pos, - [brx_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::LD_C && - instr.gpr0.Value() == brx_tracked_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; - }, - [](auto instr, const auto& opcode) { - const BufferInfo info = {static_cast(instr.cbuf36.index.Value()), - static_cast(instr.cbuf36.GetOffset())}; - return std::make_pair(info, instr.gpr8.Value()); - }); -} - -std::optional TrackSHLRegister(const CFGRebuildState& state, u32& pos, - u64 ldc_tracked_register) { - return TrackInstruction( - state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); -} - -std::optional TrackIMNMXValue(const CFGRebuildState& state, u32& pos, - u64 shl_tracked_register) { - return TrackInstruction( - state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast(instr.alu.GetSignedImm20_20() + 1); - }); -} - -std::optional TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { - const auto brx_info = GetBRXInfo(state, pos); - if (!brx_info) { - return std::nullopt; - } - const auto [relative_position, brx_tracked_register] = *brx_info; - - const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); - if (!ldc_info) { - return std::nullopt; - } - const auto [buffer_info, ldc_tracked_register] = *ldc_info; - - const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); - if (!shl_tracked_register) { - return std::nullopt; - } - - const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); - if (!entries) { - return std::nullopt; - } - - return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; -} - -std::pair ParseCode(CFGRebuildState& state, u32 address) { - u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_code.size()); - ParseInfo parse_info{}; - SingleBranch single_branch{}; - - const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { - const auto pair = rebuild_state.labels.emplace(label_address); - if (pair.second) { - rebuild_state.inspect_queries.push_back(label_address); - } - }; - - while (true) { - if (offset >= end_address) { - // ASSERT_OR_EXECUTE can't be used, as it ignores the break - ASSERT_MSG(false, "Shader passed the current limit!"); - - single_branch.address = exit_branch; - single_branch.ignore = false; - break; - } - if (state.registered.contains(offset)) { - single_branch.address = offset; - single_branch.ignore = true; - break; - } - if (IsSchedInstruction(offset, state.start)) { - offset++; - continue; - } - const Instruction instr = {state.program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { - offset++; - continue; - } - - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRA: { - if (instr.bra.constant_buffer != 0) { - return {ParseResult::AbnormalFlow, parse_info}; - } - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - const u32 branch_offset = offset + instr.bra.GetBranchTarget(); - if (branch_offset == 0) { - single_branch.address = exit_branch; - } else { - single_branch.address = branch_offset; - } - insert_label(state, branch_offset); - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SYNC: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = true; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRK: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = true; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::KIL: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = true; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SSY: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.ssy_labels.emplace(offset, target); - break; - } - case OpCode::Id::PBK: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.pbk_labels.emplace(offset, target); - break; - } - case OpCode::Id::BRX: { - const auto tmp = TrackBranchIndirectInfo(state, offset); - if (!tmp) { - LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); - return {ParseResult::AbnormalFlow, parse_info}; - } - - const auto result = *tmp; - const s32 pc_target = offset + result.relative_position; - std::vector branches; - for (u32 i = 0; i < result.entries; i++) { - auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); - if (!key) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *key; - u32 target = static_cast((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - static_cast(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } - default: - break; - } - - offset++; - } - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - parse_info.end_address = offset - 1; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, - single_branch.is_brk, single_branch.ignore); - return {ParseResult::BlockEnd, parse_info}; -} - -bool TryInspectAddress(CFGRebuildState& state) { - if (state.inspect_queries.empty()) { - return false; - } - - const u32 address = state.inspect_queries.front(); - state.inspect_queries.pop_front(); - const auto [result, block_index] = TryGetBlock(state, address); - switch (result) { - case BlockCollision::Found: { - return true; - } - case BlockCollision::Inside: { - // This case is the tricky one: - // We need to split the block into 2 separate blocks - const u32 end = state.block_info[block_index].end; - BlockInfo& new_block = CreateBlockInfo(state, address, end); - BlockInfo& current_block = state.block_info[block_index]; - current_block.end = address - 1; - new_block.branch = std::move(current_block.branch); - BlockBranchInfo forward_branch = MakeBranchInfo(); - const auto branch = std::get_if(forward_branch.get()); - branch->address = address; - branch->ignore = true; - current_block.branch = std::move(forward_branch); - return true; - } - default: - break; - } - const auto [parse_result, parse_info] = ParseCode(state, address); - if (parse_result == ParseResult::AbnormalFlow) { - // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction - return false; - } - - BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info.branch = parse_info.branch_info; - if (std::holds_alternative(*block_info.branch)) { - const auto branch = std::get_if(block_info.branch.get()); - if (branch->condition.IsUnconditional()) { - return true; - } - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); - return true; - } - return true; -} - -bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack& cc, std::map& labels, - BlockInfo& block) { - auto gather_start = labels.lower_bound(block.start); - const auto gather_end = labels.upper_bound(block.end); - while (gather_start != gather_end) { - cc.push(gather_start->second); - ++gather_start; - } - }; - if (state.queries.empty()) { - return false; - } - - Query& q = state.queries.front(); - const u32 block_index = state.registered[q.address]; - BlockInfo& block = state.block_info[block_index]; - // If the block is visited, check if the stacks match, else gather the ssy/pbk - // labels into the current stack and look if the branch at the end of the block - // consumes a label. Schedule new queries accordingly - if (block.visited) { - BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); - state.queries.pop_front(); - return all_okay; - } - block.visited = true; - state.stacks.insert_or_assign(q.address, BlockStack{q}); - - Query q2(q); - state.queries.pop_front(); - gather_labels(q2.ssy_stack, state.ssy_labels, block); - gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (std::holds_alternative(*block.branch)) { - auto* branch = std::get_if(block.branch.get()); - if (!branch->condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } - - auto& conditional_query = state.queries.emplace_back(q2); - if (branch->is_sync) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.ssy_stack.top(); - } - conditional_query.ssy_stack.pop(); - } - if (branch->is_brk) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.pbk_stack.top(); - } - conditional_query.pbk_stack.pop(); - } - conditional_query.address = branch->address; - return true; - } - - const auto* multi_branch = std::get_if(block.branch.get()); - for (const auto& branch_case : multi_branch->branches) { - auto& conditional_query = state.queries.emplace_back(q2); - conditional_query.address = branch_case.address; - } - - return true; -} - -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = [](const Condition& cond) -> Expr { - Expr result; - if (cond.cc != ConditionCode::T) { - result = MakeExpr(cond.cc); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - bool negate = false; - if (pred > 7) { - negate = true; - pred -= 8; - } - Expr extra = MakeExpr(pred); - if (negate) { - extra = MakeExpr(std::move(extra)); - } - if (result) { - return MakeExpr(std::move(extra), std::move(result)); - } - return extra; - } - if (result) { - return result; - } - return MakeExpr(true); - }; - - if (std::holds_alternative(*branch_info)) { - const auto* branch = std::get_if(branch_info.get()); - if (branch->address < 0) { - if (branch->kill) { - mm.InsertReturn(get_expr(branch->condition), true); - return; - } - mm.InsertReturn(get_expr(branch->condition), false); - return; - } - mm.InsertGoto(get_expr(branch->condition), branch->address); - return; - } - const auto* multi_branch = std::get_if(branch_info.get()); - for (const auto& branch_case : multi_branch->branches) { - mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), - branch_case.address); - } -} - -void DecompileShader(CFGRebuildState& state) { - state.manager->Init(); - for (auto label : state.labels) { - state.manager->DeclareLabel(label); - } - for (const auto& block : state.block_info) { - if (state.labels.contains(block.start)) { - state.manager->InsertLabel(block.start); - } - const bool ignore = BlockBranchIsIgnored(block.branch); - const u32 end = ignore ? block.end + 1 : block.end; - state.manager->InsertBlock(block.start, end); - if (!ignore) { - InsertBranch(*state.manager, block.branch); - } - } - state.manager->Decompile(); -} - -} // Anonymous namespace - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry) { - auto result_out = std::make_unique(); - if (settings.depth == CompileDepth::BruteForce) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - - CFGRebuildState state{program_code, start_address, registry}; - // Inspect Code and generate blocks - state.labels.clear(); - state.labels.emplace(start_address); - state.inspect_queries.push_back(state.start); - while (!state.inspect_queries.empty()) { - if (!TryInspectAddress(state)) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - } - - bool use_flow_stack = true; - - bool decompiled = false; - - if (settings.depth != CompileDepth::FlowStack) { - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; - } - } - } - - use_flow_stack = !decompiled; - - // Sort and organize results - std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - if (decompiled && settings.depth != CompileDepth::NoFlowStack) { - ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, - settings.disable_else_derivation}; - state.manager = &manager; - DecompileShader(state); - decompiled = state.manager->IsFullyDecompiled(); - if (!decompiled) { - if (settings.depth == CompileDepth::FullDecompile) { - LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); - } else { - LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); - } - state.manager->ShowCurrentState("Of Shader"); - state.manager->Clear(); - } else { - auto characteristics = std::make_unique(); - characteristics->start = start_address; - characteristics->settings.depth = settings.depth; - characteristics->manager = std::move(manager); - characteristics->end = state.block_info.back().end + 1; - return characteristics; - } - } - - result_out->start = start_address; - result_out->settings.depth = - use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out->blocks.clear(); - for (auto& block : state.block_info) { - ShaderBlock new_block{}; - new_block.start = block.start; - new_block.end = block.end; - new_block.ignore_branch = BlockBranchIsIgnored(block.branch); - if (!new_block.ignore_branch) { - new_block.branch = block.branch; - } - result_out->end = std::max(result_out->end, block.end); - result_out->blocks.push_back(new_block); - } - if (!use_flow_stack) { - result_out->labels = std::move(state.labels); - return result_out; - } - - auto back = result_out->blocks.begin(); - auto next = std::next(back); - while (next != result_out->blocks.end()) { - if (!state.labels.contains(next->start) && next->start == back->end + 1) { - back->end = next->end; - next = result_out->blocks.erase(next); - continue; - } - back = next; - ++next; - } - - return result_out; -} -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -constexpr s32 exit_branch = -1; - -struct Condition { - Pred predicate{Pred::UnusedIndex}; - ConditionCode cc{ConditionCode::T}; - - bool IsUnconditional() const { - return predicate == Pred::UnusedIndex && cc == ConditionCode::T; - } - - bool operator==(const Condition& other) const { - return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); - } - - bool operator!=(const Condition& other) const { - return !operator==(other); - } -}; - -class SingleBranch { -public: - SingleBranch() = default; - explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, - bool is_brk_, bool ignore_) - : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, - ignore{ignore_} {} - - bool operator==(const SingleBranch& b) const { - return std::tie(condition, address, kill, is_sync, is_brk, ignore) == - std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); - } - - bool operator!=(const SingleBranch& b) const { - return !operator==(b); - } - - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; - -struct CaseBranch { - explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} - u32 cmp_value; - u32 address; -}; - -class MultiBranch { -public: - explicit MultiBranch(u32 gpr_, std::vector&& branches_) - : gpr{gpr_}, branches{std::move(branches_)} {} - - u32 gpr{}; - std::vector branches{}; -}; - -using BranchData = std::variant; -using BlockBranchInfo = std::shared_ptr; - -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); - -struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; - BlockBranchInfo branch{}; - - bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch) == - std::tie(sb.start, sb.end, sb.ignore_branch) && - BlockBranchInfoAreEqual(branch, sb.branch); - } - - bool operator!=(const ShaderBlock& sb) const { - return !operator==(sb); - } -}; - -struct ShaderCharacteristics { - std::list blocks{}; - std::set labels{}; - u32 start{}; - u32 end{}; - ASTManager manager{true, true}; - CompilerSettings settings{}; -}; - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { - -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - ++count; - bound_offsets.emplace_back(sampler.offset); - } - if (count > 1) { - gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - -std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, - VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - const u32 base_offset = sampler_to_deduce.offset; - u32 max_offset{std::numeric_limits::max()}; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - if (sampler.offset > base_offset) { - max_offset = std::min(sampler.offset, max_offset); - } - } - if (max_offset == std::numeric_limits::max()) { - return std::nullopt; - } - return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); -} - -} // Anonymous namespace - -class ASTDecoder { -public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) {} - - void operator()(ASTVarSet& ast) {} - - void operator()(ASTLabel& ast) {} - - void operator()(ASTGoto& ast) {} - - void operator()(ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTReturn& ast) {} - - void operator()(ASTBreak& ast) {} - - void Visit(ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - if (node->IsBlockEncoded()) { - auto block = std::get_if(node->GetInnerData()); - NodeBlock bb = ir.DecodeRange(block->start, block->end); - node->TransformBlockEncoded(std::move(bb)); - } - } - -private: - ShaderIR& ir; -}; - -void ShaderIR::Decode() { - std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - - decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } -} - -NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - NodeBlock basic_block; - DecodeRangeInner(basic_block, begin, end); - return basic_block; -} - -void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(bb, pc); - } -} - -void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { - Node result = n; - if (cond.cc != ConditionCode::T) { - result = Conditional(GetConditionCode(cond.cc), {result}); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - const bool is_neg = pred > 7; - if (is_neg) { - pred -= 8; - } - result = Conditional(GetPredicate(pred, is_neg), {result}); - } - return result; - }; - if (std::holds_alternative(*block.branch)) { - auto branch = std::get_if(block.branch.get()); - if (branch->address < 0) { - if (branch->kill) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Branch, Immediate(branch->address)); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - auto multi_branch = std::get_if(block.branch.get()); - Node op_a = GetRegister(multi_branch->gpr); - for (auto& branch_case : multi_branch->branches) { - Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); - Node op_b = Immediate(branch_case.cmp_value); - Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); - auto result = Conditional(condition, {n}); - bb.push_back(result); - global_code.push_back(result); - } -} - -u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { - // Ignore sched instructions when generating code. - if (IsSchedInstruction(pc, main_offset)) { - return pc + 1; - } - - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const u32 nv_address = ConvertAddressToNvidiaSpace(pc); - - // Decoding failure - if (!opcode) { - UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); - bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", - nv_address, instr.value))); - return pc + 1; - } - - bb.push_back(Comment( - fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); - - using Tegra::Shader::Pred; - UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, - "NeverExecute predicate not implemented"); - - static const std::map decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, - {OpCode::Type::Image, &ShaderIR::DecodeImage}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; - - std::vector tmp_block; - if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, pc); - } else { - pc = DecodeOther(tmp_block, pc); - } - - // Some instructions (like SSY) don't have a predicate field, they are always unconditionally - // executed. - const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); - const auto pred_index = static_cast(instr.pred.pred_index); - - if (can_be_predicated && pred_index != static_cast(Pred::UnusedIndex)) { - const Node conditional = - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); - global_code.push_back(conditional); - bb.push_back(conditional); - } else { - for (auto& node : tmp_block) { - global_code.push_back(node); - bb.push_back(node); - } - } - - return pc + 1; -} - -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto gpu_driver = registry.AccessGuestDriverProfile(); - DeduceTextureHandlerSize(gpu_driver, used_samplers); - // Deduce Indexed Samplers - if (!uses_indexed_samplers) { - return; - } - for (auto& sampler : used_samplers) { - if (!sampler.is_indexed) { - continue; - } - if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { - sampler.size = *size; - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.size = 1; - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::SubOp; - -u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - - Node op_b = [&] { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV_C: - case OpCode::Id::MOV_R: { - // MOV does not have neither 'abs' nor 'neg' bits. - SetRegister(bb, instr.gpr0, op_b); - break; - } - case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: - case OpCode::Id::FMUL_IMM: { - // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - if (instr.fmul.tab5cb8_2 != 0) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - } - if (instr.fmul.tab5c68_0 != 1) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); - } - - op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - - static constexpr std::array FmulPostFactor = { - 1.000f, // None - 0.500f, // Divide 2 - 0.250f, // Divide 4 - 0.125f, // Divide 8 - 8.000f, // Mul 8 - 4.000f, // Mul 4 - 2.000f, // Mul 2 - }; - - if (instr.fmul.postfactor != 0) { - op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, - Immediate(FmulPostFactor[instr.fmul.postfactor])); - } - - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); - - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: - case OpCode::Id::FADD_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::MUFU: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - - Node value = [&]() { - switch (instr.sub_op) { - case SubOp::Cos: - return Operation(OperationCode::FCos, PRECISE, op_a); - case SubOp::Sin: - return Operation(OperationCode::FSin, PRECISE, op_a); - case SubOp::Ex2: - return Operation(OperationCode::FExp2, PRECISE, op_a); - case SubOp::Lg2: - return Operation(OperationCode::FLog2, PRECISE, op_a); - case SubOp::Rcp: - return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); - case SubOp::Rsq: - return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); - case SubOp::Sqrt: - return Operation(OperationCode::FSqrt, PRECISE, op_a); - default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); - return Immediate(0); - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FMNMX_C: - case OpCode::Id::FMNMX_R: - case OpCode::Id::FMNMX_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); - - const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); - const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: - case OpCode::Id::FCMP_IMMR: { - UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); - Node op_c = GetRegister(instr.gpr39); - Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); - break; - } - case OpCode::Id::RRO_C: - case OpCode::Id::RRO_R: - case OpCode::Id::RRO_IMM: { - LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); - - // Currently RRO is only implemented as a register move. - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - SetRegister(bb, instr.gpr0, op_b); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - bool negate_a = false; - bool negate_b = false; - bool absolute_a = false; - bool absolute_b = false; - - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_R: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HADD2_C: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 56) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - case OpCode::Id::HMUL2_R: - negate_a = ((instr.value >> 43) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HMUL2_C: - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - default: - UNREACHABLE(); - break; - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - - auto [type_b, op_b] = [this, instr, opcode]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HMUL2_C: - return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HADD2_R: - case OpCode::Id::HMUL2_R: - return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; - default: - UNREACHABLE(); - return {HalfType::F32, Immediate(0)}; - } - }(); - op_b = UnpackHalfFloat(op_b, type_b); - op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - - Node value = [this, opcode, op_a, op_b = op_b] { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_C: - case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); - return Immediate(0); - } - }(); - value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); - - const Node op_b = UnpackHalfImmediate(instr, true); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); - SetRegister(bb, instr.gpr0, value); - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV32_IMM: { - SetRegister(bb, instr.gpr0, GetImmediate32(instr)); - break; - } - case OpCode::Id::FMUL32_IMM: { - Node value = - Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); - value = GetSaturatedFloat(value, instr.fmul32.saturate); - - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD32I: { - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, - instr.fadd32i.negate_a); - const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, - instr.fadd32i.negate_b); - - const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::IAdd3Height; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD_C: - case OpCode::Id::IADD_R: - case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); - UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - Node value = Operation(OperationCode::UAdd, op_a, op_b); - - if (instr.iadd.x) { - Node carry = GetInternalFlag(InternalFlag::Carry); - Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); - } - - if (instr.generates_cc) { - const Node i0 = Immediate(0); - - Node zero = Operation(OperationCode::LogicalIEqual, value, i0); - Node sign = Operation(OperationCode::LogicalILessThan, value, i0); - Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); - - Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); - Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); - Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); - Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); - - SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); - SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); - SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); - SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::IADD3_C: - case OpCode::Id::IADD3_R: - case OpCode::Id::IADD3_IMM: { - Node op_c = GetRegister(instr.gpr39); - - const auto ApplyHeight = [&](IAdd3Height height, Node value) { - switch (height) { - case IAdd3Height::None: - return value; - case IAdd3Height::LowerHalfWord: - return BitfieldExtract(value, 0, 16); - case IAdd3Height::UpperHalfWord: - return BitfieldExtract(value, 16, 16); - default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); - return Immediate(0); - } - }; - - if (opcode->get().GetId() == OpCode::Id::IADD3_R) { - op_a = ApplyHeight(instr.iadd3.height_a, op_a); - op_b = ApplyHeight(instr.iadd3.height_b, op_b); - op_c = ApplyHeight(instr.iadd3.height_c, op_c); - } - - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); - op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - - const Node value = [&] { - Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); - if (opcode->get().GetId() != OpCode::Id::IADD3_R) { - return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); - } - const Node shifted = [&] { - switch (instr.iadd3.mode) { - case Tegra::Shader::IAdd3Mode::RightShift: - // TODO(tech4me): According to - // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 - // The addition between op_a and op_b should be done in uint33, more - // investigation required - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, - Immediate(16)); - case Tegra::Shader::IAdd3Mode::LeftShift: - return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, - Immediate(16)); - default: - return add_ab; - } - }(); - return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); - }(); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ISCADD_C: - case OpCode::Id::ISCADD_R: - case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); - const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::POPC_C: - case OpCode::Id::POPC_R: - case OpCode::Id::POPC_IMM: { - if (instr.popc.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - } - const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FLO_R: - case OpCode::Id::FLO_C: - case OpCode::Id::FLO_IMM: { - Node value; - if (instr.flo.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.is_signed) { - value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); - } else { - value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.sh) { - value = - Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::SEL_C: - case OpCode::Id::SEL_R: - case OpCode::Id::SEL_IMM: { - const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); - const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ICMP_CR: - case OpCode::Id::ICMP_R: - case OpCode::Id::ICMP_RC: - case OpCode::Id::ICMP_IMM: { - const Node zero = Immediate(0); - - const auto [op_rhs, test] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_R: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::ICMP_IMM: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {zero, zero}; - } - }(); - const Node op_lhs = GetRegister(instr.gpr8); - const Node comparison = - GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); - break; - } - case OpCode::Id::LOP_C: - case OpCode::Id::LOP_R: - case OpCode::Id::LOP_IMM: { - if (instr.alu.lop.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); - if (instr.alu.lop.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, - instr.generates_cc); - break; - } - case OpCode::Id::LOP3_C: - case OpCode::Id::LOP3_R: - case OpCode::Id::LOP3_IMM: { - const Node op_c = GetRegister(instr.gpr39); - const Node lut = [&]() { - if (opcode->get().GetId() == OpCode::Id::LOP3_R) { - return Immediate(instr.alu.lop3.GetImmLut28()); - } else { - return Immediate(instr.alu.lop3.GetImmLut48()); - } - }(); - - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); - break; - } - case OpCode::Id::IMNMX_C: - case OpCode::Id::IMNMX_R: - case OpCode::Id::IMNMX_IMM: { - UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - - const bool is_signed = instr.imnmx.is_signed; - - const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); - const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::LEA_R2: - case OpCode::Id::LEA_R1: - case OpCode::Id::LEA_IMM: - case OpCode::Id::LEA_RZ: - case OpCode::Id::LEA_HI: { - auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::LEA_R2: { - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), - Immediate(static_cast(instr.lea.r2.entry_a))}; - } - case OpCode::Id::LEA_R1: { - const bool neg = instr.lea.r1.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - GetRegister(instr.gpr20), - Immediate(static_cast(instr.lea.r1.entry_a))}; - } - case OpCode::Id::LEA_IMM: { - const bool neg = instr.lea.imm.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.imm.entry_a)), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - case OpCode::Id::LEA_RZ: { - const bool neg = instr.lea.rz.neg != 0; - return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.rz.entry_a))}; - } - case OpCode::Id::LEA_HI: - default: - UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); - - return {Immediate(static_cast(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast(Pred::UnusedIndex), - "Unhandled LEA Predicate"); - - Node value = - Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); - value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); - SetRegister(bb, instr.gpr0, std::move(value)); - - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut, bool sets_cc) { - const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { - Node value = Immediate(0); - const ImmediateNode imm = std::get(*ttbl); - if (imm.GetValue() & 0x01) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x02) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x04) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x08) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x10) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x20) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x40) { - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x80) { - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - return value; - }(op_a, op_b, op_c, imm_lut); - - SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); - SetRegister(bb, dest, lop3_fast); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::LogicOperation; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredicateResultMode; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = Immediate(static_cast(instr.alu.imm20_32)); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - - op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - - Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) { - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); - } - - if (instr.alu.lop32i.invert_b) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), - std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, - instr.op_32.generates_cc != 0); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, - Node op_b, PredicateResultMode predicate_mode, Pred predicate, - bool sets_cc) { - Node result = [&] { - switch (logic_op) { - case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::PassB: - return op_b; - default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); - return Immediate(0); - } - }(); - - SetInternalFlagsFromInteger(bb, result, sets_cc); - SetRegister(bb, dest, result); - - // Write the predicate value depending on the predicate mode. - switch (predicate_mode) { - case PredicateResultMode::None: - // Do nothing. - return; - case PredicateResultMode::NotZero: { - // Set the predicate to true if the result is not zero. - Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); - SetPredicate(bb, static_cast(predicate), std::move(compare)); - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_R: - return GetRegister(instr.gpr20); - case OpCode::Id::BFE_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::BFE_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - - const bool is_signed = instr.bfe.is_signed; - - // using reverse parallel method in - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - // note for later if possible to implement faster method. - if (instr.bfe.brev) { - const auto swap = [&](u32 s, u32 mask) { - Node v1 = - SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); - if (mask != 0) { - v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), - Immediate(mask)); - } - Node v2 = op_a; - if (mask != 0) { - v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), - Immediate(mask)); - } - v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), - Immediate(s)); - return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), - std::move(v2)); - }; - op_a = swap(1, 0x55555555U); - op_a = swap(2, 0x33333333U); - op_a = swap(4, 0x0F0F0F0FU); - op_a = swap(8, 0x00FF00FFU); - op_a = swap(16, 0); - } - - const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(0), Immediate(8)); - const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(8), Immediate(8)); - auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); - SetRegister(bb, instr.gpr0, std::move(result)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto [packed_shift, base] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::BFI_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::BFI_IMM_R: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {Immediate(0), Immediate(0)}; - } - }(); - const Node insert = GetRegister(instr.gpr8); - const Node offset = BitfieldExtract(packed_shift, 0, 8); - const Node bits = BitfieldExtract(packed_shift, 8, 8); - - const Node value = - Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; - -namespace { - -constexpr OperationCode GetFloatSelector(u64 selector) { - return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; -} - -constexpr u32 SizeInBits(Register::Size size) { - switch (size) { - case Register::Size::Byte: - return 8; - case Register::Size::Short: - return 16; - case Register::Size::Word: - return 32; - case Register::Size::Long: - return 64; - } - return 0; -} - -constexpr std::optional> IntegerSaturateBounds(Register::Size src_size, - Register::Size dst_size, - bool src_signed, - bool dst_signed) { - const u32 dst_bits = SizeInBits(dst_size); - if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { - if (src_signed == dst_signed) { - return std::nullopt; - } - return std::make_pair(0, std::numeric_limits::max()); - } - if (dst_signed) { - // Signed destination, clamp to [-128, 127] for instance - return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); - } else { - // Unsigned destination - if (dst_bits == 32) { - // Avoid shifting by 32, that is undefined behavior - return std::make_pair(0, s32(std::numeric_limits::max())); - } - return std::make_pair(0, (1 << dst_bits) - 1); - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - case OpCode::Id::I2I_C: - case OpCode::Id::I2I_IMM: { - const bool src_signed = instr.conversion.is_input_signed; - const bool dst_signed = instr.conversion.is_output_signed; - const Register::Size src_size = instr.conversion.src_size; - const Register::Size dst_size = instr.conversion.dst_size; - const u32 selector = static_cast(instr.conversion.int_src.selector); - - Node value = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2I_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - // Ensure the source selector is valid - switch (instr.conversion.src_size) { - case Register::Size::Byte: - break; - case Register::Size::Short: - ASSERT(selector == 0 || selector == 2); - break; - default: - ASSERT(selector == 0); - break; - } - - if (src_size != Register::Size::Word || selector != 0) { - value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), - Immediate(selector * 8), Immediate(SizeInBits(src_size))); - } - - value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, - instr.conversion.negate_a, src_signed); - - if (instr.alu.saturate_d) { - if (src_signed && !dst_signed) { - Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, - Immediate(1 << (SizeInBits(src_size) - 1))); - value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), - std::move(value)); - - // Simplify generated expressions, this can be removed without semantic impact - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - - if (dst_size != Register::Size::Word) { - const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); - Node is_large = - Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); - value = Operation(OperationCode::Select, std::move(is_large), limit, - std::move(value)); - } - } else if (const std::optional bounds = - IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { - value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), - Immediate(bounds->first)); - value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), - Immediate(bounds->second)); - } - } else if (dst_size != Register::Size::Word) { - // No saturation, we only have to mask the result - Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); - value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: - case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - Node value = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2F_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const bool input_signed = instr.conversion.is_input_signed; - - if (const u32 offset = static_cast(instr.conversion.int_src.selector); offset > 0) { - ASSERT(instr.conversion.src_size == Register::Size::Byte || - instr.conversion.src_size == Register::Size::Short); - if (instr.conversion.src_size == Register::Size::Short) { - ASSERT(offset == 0 || offset == 2); - } - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset * 8)); - } - - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); - value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); - value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: - case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2F_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&] { - if (instr.conversion.src_size != instr.conversion.dst_size) { - // Rounding operations only matter when the source and destination conversion size - // is the same. - return value; - } - switch (instr.conversion.f2f.GetRoundingMode()) { - case Tegra::Shader::F2fRoundingOp::None: - return value; - case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, value); - case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, value); - case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, value); - case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - instr.conversion.f2f.rounding.Value()); - return value; - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: - case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2I_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&]() { - switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::RoundEven: - return Operation(OperationCode::FRoundEven, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - instr.conversion.f2i.rounding.Value()); - return Immediate(0); - } - }(); - const bool is_signed = instr.conversion.is_output_signed; - value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); - - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - if (instr.ffma.tab5980_0 != 1) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); - } - if (instr.ffma.tab5980_1 != 0) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - } - - const Node op_a = GetRegister(instr.gpr8); - - auto [op_b, op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - } - case OpCode::Id::FFMA_RR: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::FFMA_RC: { - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - } - case OpCode::Id::FFMA_IMM: - return {GetImmediate19(instr), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); - return {Immediate(0), Immediate(0)}; - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); - op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); - - Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, - instr.fset.neg_a != 0); - - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); - - // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the - // condition is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fset.op); - const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.fset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.fsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node predicate = - GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); - const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); - const Node value = Operation(combiner, predicate, second_pred); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.fsetp.pred3, value); - - if (instr.fsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - const Node second_value = Operation(combiner, negated_pred, second_pred); - SetPredicate(bb, instr.fsetp.pred0, second_value); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - PredCondition cond{}; - bool bf = false; - bool ftz = false; - bool neg_a = false; - bool abs_a = false; - bool neg_b = false; - bool abs_b = false; - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - case OpCode::Id::HSET2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - bf = instr.Bit(53); - ftz = instr.Bit(54); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(56); - abs_b = instr.Bit(54); - break; - case OpCode::Id::HSET2_R: - cond = instr.hsetp2.reg.cond; - bf = instr.Bit(49); - ftz = instr.Bit(50); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(31); - abs_b = instr.Bit(30); - break; - default: - UNREACHABLE(); - } - - Node op_b = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - // Inform as unimplemented as this is not tested. - UNIMPLEMENTED_MSG("HSET2_C is not implemented"); - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::HSET2_R: - return GetRegister(instr.gpr20); - case OpCode::Id::HSET2_IMM: - return UnpackHalfImmediate(instr, true); - default: - UNREACHABLE(); - return Node{}; - } - }(); - - if (!ftz) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); - - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_R: - op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); - [[fallthrough]]; - case OpCode::Id::HSET2_C: - op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); - break; - default: - break; - } - - Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - - Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); - - // HSET2 operates on each half float in the pack. - std::array values; - for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = bf ? 0x3c00 : 0xffff; - Node true_value = Immediate(raw_value << (i * 16)); - Node false_value = Immediate(0); - - Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - Node predicate = Operation(combiner, comparison, second_pred); - values[i] = - Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); - } - - Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); - SetRegister(bb, instr.gpr0, move(value)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (instr.hsetp2.ftz != 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - - Tegra::Shader::PredCondition cond{}; - bool h_and{}; - Node op_b{}; - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_C: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); - // F32 is hardcoded in hardware - op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); - break; - case OpCode::Id::HSETP2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = UnpackHalfImmediate(instr, true); - break; - case OpCode::Id::HSETP2_R: - cond = instr.hsetp2.reg.cond; - h_and = instr.hsetp2.reg.h_and; - op_b = - GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), - instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); - break; - default: - UNREACHABLE(); - op_b = Immediate(0); - } - - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - - const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); - }; - - const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred3; - const u64 second = instr.hsetp2.pred0; - if (h_and) { - Node joined = Operation(OperationCode::LogicalAnd2, comparison); - Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); - } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfPrecision; -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); - } else { - DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); - } - - constexpr auto identity = HalfType::H0_H1; - bool neg_b{}, neg_c{}; - auto [saturate, type_b, op_b, type_c, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::HFMA2_CR: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, HalfType::F32, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_RC: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HFMA2_RR: - neg_b = instr.hfma2.rr.negate_b; - neg_c = instr.hfma2.rr.negate_c; - return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), - instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_IMM_R: - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - default: - return {false, identity, Immediate(0), identity, Immediate(0)}; - } - }(); - - const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); - op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); - op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - - Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedHalfFloat(value, saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/textures/texture.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; -using Tegra::Shader::StoreType; -using Tegra::Texture::ComponentType; -using Tegra::Texture::TextureFormat; -using Tegra::Texture::TICEntry; - -namespace { - -ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, - std::size_t component) { - const TextureFormat format{descriptor.format}; - switch (format) { - case TextureFormat::R16G16B16A16: - case TextureFormat::R32G32B32A32: - case TextureFormat::R32G32B32: - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.b_type; - } - if (component == 3) { - return descriptor.a_type; - } - break; - case TextureFormat::A8R8G8B8: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.r_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.b_type; - } - break; - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.r_type; - } - break; - case TextureFormat::R32_B24G8: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - break; - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - if (component == 0) { - return descriptor.b_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.r_type; - } - break; - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - if (component == 0) { - return descriptor.g_type; - } - if (component == 1) { - return descriptor.r_type; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return ComponentType::FLOAT; -} - -bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), - (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask)}.test(component); -} - -u32 GetComponentSize(TextureFormat format, std::size_t component) { - switch (format) { - case TextureFormat::R32G32B32A32: - return 32; - case TextureFormat::R16G16B16A16: - return 16; - case TextureFormat::R32G32B32: - return component <= 2 ? 32 : 0; - case TextureFormat::R32G32: - return component <= 1 ? 32 : 0; - case TextureFormat::R16G16: - return component <= 1 ? 16 : 0; - case TextureFormat::R32: - return component == 0 ? 32 : 0; - case TextureFormat::R16: - return component == 0 ? 16 : 0; - case TextureFormat::R8: - return component == 0 ? 8 : 0; - case TextureFormat::R1: - return component == 0 ? 1 : 0; - case TextureFormat::A8R8G8B8: - return 8; - case TextureFormat::A2B10G10R10: - return (component == 3 || component == 2 || component == 1) ? 10 : 2; - case TextureFormat::A4B4G4R4: - return 4; - case TextureFormat::A5B5G5R1: - return (component == 0 || component == 1 || component == 2) ? 5 : 1; - case TextureFormat::A1B5G5R5: - return (component == 1 || component == 2 || component == 3) ? 5 : 1; - case TextureFormat::R32_B24G8: - if (component == 0) { - return 32; - } - if (component == 1) { - return 24; - } - if (component == 2) { - return 8; - } - return 0; - case TextureFormat::B5G6R5: - if (component == 0 || component == 2) { - return 5; - } - if (component == 1) { - return 6; - } - return 0; - case TextureFormat::B6G5R5: - if (component == 1 || component == 2) { - return 5; - } - if (component == 0) { - return 6; - } - return 0; - case TextureFormat::B10G11R11: - if (component == 1 || component == 2) { - return 11; - } - if (component == 0) { - return 10; - } - return 0; - case TextureFormat::R24G8: - if (component == 0) { - return 8; - } - if (component == 1) { - return 24; - } - return 0; - case TextureFormat::R8G24: - if (component == 0) { - return 24; - } - if (component == 1) { - return 8; - } - return 0; - case TextureFormat::R8G8: - return (component == 0 || component == 1) ? 8 : 0; - case TextureFormat::G4R4: - return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; - } -} - -std::size_t GetImageComponentMask(TextureFormat format) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - switch (format) { - case TextureFormat::R32G32B32A32: - case TextureFormat::R16G16B16A16: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - return std::size_t{R | G | B | A}; - case TextureFormat::R32G32B32: - case TextureFormat::R32_B24G8: - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - return std::size_t{R | G | B}; - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - return std::size_t{R | G}; - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; - } -} - -std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - case Tegra::Shader::ImageType::TextureBuffer: - return 1; - case Tegra::Shader::ImageType::Texture1DArray: - case Tegra::Shader::ImageType::Texture2D: - return 2; - case Tegra::Shader::ImageType::Texture2DArray: - case Tegra::Shader::ImageType::Texture3D: - return 3; - } - UNREACHABLE(); - return 1; -} -} // Anonymous namespace - -std::pair ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, - Node original_value) { - switch (component_type) { - case ComponentType::SNORM: { - // range [-1.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) / 2.f - 1.f)); - cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); - return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; - } - case ComponentType::SINT: - case ComponentType::UNORM: { - bool is_signed = component_type == ComponentType::SINT; - // range [0.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) - 1.f)); - return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), - is_signed}; - } - case ComponentType::UINT: // range [0, (1 << component_size) - 1] - return {std::move(original_value), false}; - case ComponentType::FLOAT: - if (component_size == 16) { - return {Operation(OperationCode::HCastFloat, original_value), true}; - } else { - return {std::move(original_value), true}; - } - default: - UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); - return {std::move(original_value), true}; - } -} - -u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { - std::vector coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; - coords.reserve(num_coords); - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - return coords; - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkRead(); - - if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; - } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && - instr.suldst.GetStoreDataLayout() != StoreType::Bits64); - - auto descriptor = [this, instr] { - std::optional sampler_descriptor; - if (instr.suldst.is_immediate) { - sampler_descriptor = - registry.ObtainBoundSampler(static_cast(instr.image.index.Value())); - } else { - const Node image_register = GetRegister(instr.gpr39); - const auto result = TrackCbuf(image_register, global_code, - static_cast(global_code.size())); - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); - } - if (!sampler_descriptor) { - UNREACHABLE_MSG("Failed to obtain image descriptor"); - } - return *sampler_descriptor; - }(); - - const auto comp_mask = GetImageComponentMask(descriptor.format); - - switch (instr.suldst.GetStoreDataLayout()) { - case StoreType::Bits32: - case StoreType::Bits64: { - u32 indexer = 0; - u32 shifted_counter = 0; - Node value = Immediate(0); - for (u32 element = 0; element < 4; ++element) { - if (!IsComponentEnabled(comp_mask, element)) { - continue; - } - const auto component_type = GetComponentType(descriptor, element); - const auto component_size = GetComponentSize(descriptor.format, element); - MetaImage meta{image, {}, element}; - - auto [converted_value, is_signed] = GetComponentValue( - component_type, component_size, - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); - - // shift element to correct position - const auto shifted = shifted_counter; - if (shifted > 0) { - converted_value = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, - std::move(converted_value), Immediate(shifted)); - } - shifted_counter += component_size; - - // add value into result - value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); - - // if we shifted enough for 1 byte -> we save it into temp - if (shifted_counter >= 32) { - SetTemporary(bb, indexer++, std::move(value)); - // reset counter and value to prepare pack next byte - value = Immediate(0); - shifted_counter = 0; - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNREACHABLE(); - break; - } - } - break; - } - case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA - - std::vector values; - constexpr std::size_t hardcoded_size{4}; - for (std::size_t i = 0; i < hardcoded_size; ++i) { - values.push_back(GetRegister(instr.gpr0.Value() + i)); - } - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkWrite(); - - MetaImage meta{image, std::move(values)}; - bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); - break; - } - case OpCode::Id::SUATOM: { - UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - - const OperationCode operation_code = [instr] { - switch (instr.suatom_d.operation_type) { - case Tegra::Shader::ImageAtomicOperationType::S32: - case Tegra::Shader::ImageAtomicOperationType::U32: - switch (instr.suatom_d.operation) { - case Tegra::Shader::ImageAtomicOperation::Add: - return OperationCode::AtomicImageAdd; - case Tegra::Shader::ImageAtomicOperation::And: - return OperationCode::AtomicImageAnd; - case Tegra::Shader::ImageAtomicOperation::Or: - return OperationCode::AtomicImageOr; - case Tegra::Shader::ImageAtomicOperation::Xor: - return OperationCode::AtomicImageXor; - case Tegra::Shader::ImageAtomicOperation::Exch: - return OperationCode::AtomicImageExchange; - default: - break; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", - static_cast(instr.suatom_d.operation.Value()), - static_cast(instr.suatom_d.operation_type.Value())); - return OperationCode::AtomicImageAdd; - }(); - - Node value = GetRegister(instr.gpr0); - - const auto type = instr.suatom_d.image_type; - auto& image = GetImage(instr.image, type); - image.MarkAtomic(); - - MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset = static_cast(image.index.Value()); - - const auto it = - std::find_if(std::begin(used_images), std::end(used_images), - [offset](const ImageEntry& entry) { return entry.offset == offset; }); - if (it != std::end(used_images)) { - ASSERT(!it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, type); -} - -ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register = GetRegister(reg); - const auto result = - TrackCbuf(image_register, global_code, static_cast(global_code.size())); - - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const ImageEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != std::end(used_images)) { - ASSERT(it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, buffer, type); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition - // is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); - const Node first_pred = - GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.iset.op); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // We can't use the constant predicate as destination. - ASSERT(instr.isetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); - const Node predicate = - GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); - const Node value = Operation(combiner, predicate, second_pred); - SetPredicate(bb, instr.isetp.pred3, value); - - if (instr.isetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::AtomicOp; -using Tegra::Shader::AtomicType; -using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::StoreType; - -namespace { - -OperationCode GetAtomOperation(AtomicOp op) { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", op); - return OperationCode::AtomicIAdd; - } -} - -bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { - return uniform_type == Tegra::Shader::UniformType::UnsignedByte || - uniform_type == Tegra::Shader::UniformType::UnsignedShort; -} - -u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 0b11; - case Tegra::Shader::UniformType::UnsignedShort: - return 0b10; - default: - UNREACHABLE(); - return 0; - } -} - -u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 8; - case Tegra::Shader::UniformType::UnsignedShort: - return 16; - case Tegra::Shader::UniformType::Single: - return 32; - case Tegra::Shader::UniformType::Double: - return 64; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 128; - default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); - return 32; - } -} - -Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); -} - -Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), - Immediate(size)); -} - -Node Sign16Extend(Node value) { - Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); - Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::LD_A: { - // Note: Shouldn't this be interp mode flat? As in no interpolation made. - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && - instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, - "Non-32 bits PHYS reads are not implemented"); - - const Node buffer{GetRegister(instr.gpr39)}; - - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast(instr.attribute.fmt20.index.Value()); - - const auto LoadNextElement = [&](u32 reg_offset) { - const Node attribute{instr.attribute.fmt20.IsPhysical() - ? GetPhysicalInputAttribute(instr.gpr8, buffer) - : GetInputAttribute(static_cast(next_index), - next_element, buffer)}; - - SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); - - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - LoadNextElement(reg_offset); - } - break; - } - case OpCode::Id::LD_C: { - UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - - Node index = GetRegister(instr.gpr8); - - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - - switch (instr.ld_c.type.Value()) { - case Tegra::Shader::UniformType::Single: - SetRegister(bb, instr.gpr0, op_a); - break; - - case Tegra::Shader::UniformType::Double: { - const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - - SetTemporary(bb, 0, op_a); - SetTemporary(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); - } - break; - } - case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); - [[fallthrough]]; - case OpCode::Id::LD_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate_offset = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - }; - const auto GetMemory = [&](s32 offset) { - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) - : GetLocalMemory(GetAddress(offset)); - }; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: - SetRegister(bb, instr.gpr0, - Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); - break; - case StoreType::Bits32: - case StoreType::Bits64: - case StoreType::Bits128: { - const u32 count = [&] { - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits32: - return 1; - case StoreType::Bits64: - return 2; - case StoreType::Bits128: - return 4; - default: - UNREACHABLE(); - return 0; - } - }(); - for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i, GetMemory(i * 4)); - } - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::LD: - case OpCode::Id::LDG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::LD: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); - return instr.generic.type; - case OpCode::Id::LDG: - return instr.ldg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, false); - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - if (!real_address_base || !base_address) { - // Tracking failed, load zeroes. - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); - } - break; - } - - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - Node gmem = MakeNode(real_address, base_address, descriptor); - - // To handle unaligned loads get the bytes used to dereference global memory and extract - // those bytes from the loaded u32. - if (IsUnaligned(type)) { - gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); - } - - SetTemporary(bb, i, gmem); - } - - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::ST_A: { - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - - u64 element = instr.attribute.fmt20.element; - auto index = static_cast(instr.attribute.fmt20.index.Value()); - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - Node dest; - if (instr.attribute.fmt20.patch) { - const u32 offset = static_cast(index) * 4 + static_cast(element); - dest = MakeNode(offset); - } else { - dest = GetOutputAttribute(static_cast(index), element, - GetRegister(instr.gpr39)); - } - const auto src = GetRegister(instr.gpr0.Value() + reg_offset); - - bb.push_back(Operation(OperationCode::Assign, dest, src)); - - // Load the next attribute element into the following register. If the element to load - // goes beyond the vec4 size, load the first element of the next attribute. - element = (element + 1) % 4; - index = index + (element == 0 ? 1 : 0); - } - break; - } - case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); - [[fallthrough]]; - case OpCode::Id::ST_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); - }; - - const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; - const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; - const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits128: - (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); - (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); - [[fallthrough]]; - case StoreType::Bits64: - (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); - [[fallthrough]]; - case StoreType::Bits32: - (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); - break; - case StoreType::Unsigned16: - case StoreType::Signed16: { - Node address = GetAddress(0); - Node memory = (this->*get_memory)(address); - (this->*set_memory)( - bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); - break; - } - default: - UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::ST: - case OpCode::Id::STG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::ST: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); - return instr.generic.type; - case OpCode::Id::STG: - return instr.stg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - // For unaligned reads we have to read memory too. - const bool is_read = IsUnaligned(type); - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, is_read, true); - if (!real_address_base || !base_address) { - // Tracking failed, skip the store. - break; - } - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - const Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0.Value() + i); - - if (IsUnaligned(type)) { - const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, move(value), real_address, mask, size); - } - - bb.push_back(Operation(OperationCode::Assign, gmem, value)); - } - break; - } - case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - instr.red.type.Value()); - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); - break; - } - case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || - instr.atom.operation == AtomicOp::Dec || - instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", instr.atom.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64 || - instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || - instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", instr.atom.type.Value()); - - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - - const bool is_signed = - instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; - Node gmem = MakeNode(real_address, base_address, descriptor); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || - instr.atoms.operation == AtomicOp::Dec, - "operation={}", instr.atoms.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || - instr.atoms.type == AtomicType::U64, - "type={}", instr.atoms.type.Value()); - const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; - const s32 offset = instr.atoms.GetImmediateOffset(); - Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, - GetSharedMemory(move(address)), GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::AL2P: { - // Ignore al2p.direction since we don't care about it. - - // Calculate emulation fake physical address. - const Node fixed_address{Immediate(static_cast(instr.al2p.address))}; - const Node reg{GetRegister(instr.gpr8)}; - const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; - - // Set the fake address to target register. - SetRegister(bb, instr.gpr0, fake_address); - - // Signal the shader IR to declare all possible attributes and varyings - uses_physical_attributes = true; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -std::tuple ShaderIR::TrackGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_read, bool is_write) { - const auto addr_register{GetRegister(instr.gmem.gpr)}; - const auto immediate_offset{static_cast(instr.gmem.offset)}; - - const auto [base_address, index, offset] = - TrackCbuf(addr_register, global_code, static_cast(global_code.size())); - ASSERT_OR_EXECUTE_MSG( - base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); - - bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - - const GlobalMemoryBase descriptor{index, offset}; - const auto& entry = used_global_memory.try_emplace(descriptor).first; - auto& usage = entry->second; - usage.is_written |= is_write; - usage.is_read |= is_read; - - const auto real_address = - Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); - - return {real_address, base_address, descriptor}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::OpCode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::SystemVariable; - -using Index = Tegra::Shader::Attribute::Index; - -u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::NOP: { - UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); - UNIMPLEMENTED_IF(instr.nop.trigger != 0); - // With the previous preconditions, this instruction is a no-operation. - break; - } - case OpCode::Id::EXIT: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); - - switch (instr.flow.cond) { - case Tegra::Shader::FlowCondition::Always: - bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, - // otherwise we have to account for the possibility of the condition - // not being met, so continue processing the next instruction. - pc = MAX_PROGRAM_LENGTH - 1; - } - break; - - case Tegra::Shader::FlowCondition::Fcsm_Tr: - // TODO(bunnei): What is this used for? If we assume this conditon is not - // satisifed, dual vertex shaders in Farming Simulator make more sense - UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); - break; - - default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); - } - break; - } - case OpCode::Id::KIL: { - UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); - - bb.push_back(Operation(OperationCode::Discard)); - break; - } - case OpCode::Id::S2R: { - const Node value = [this, instr] { - switch (instr.sys20) { - case SystemVariable::LaneId: - return Operation(OperationCode::ThreadId); - case SystemVariable::InvocationId: - return Operation(OperationCode::InvocationId); - case SystemVariable::Ydirection: - uses_y_negate = true; - return Operation(OperationCode::YNegate); - case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0x00ff'0000U); - case SystemVariable::WscaleFactorXY: - UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); - return Immediate(0U); - case SystemVariable::WscaleFactorZ: - UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); - return Immediate(0U); - case SystemVariable::Tid: { - Node val = Immediate(0); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return val; - } - case SystemVariable::TidX: - return Operation(OperationCode::LocalInvocationIdX); - case SystemVariable::TidY: - return Operation(OperationCode::LocalInvocationIdY); - case SystemVariable::TidZ: - return Operation(OperationCode::LocalInvocationIdZ); - case SystemVariable::CtaIdX: - return Operation(OperationCode::WorkGroupIdX); - case SystemVariable::CtaIdY: - return Operation(OperationCode::WorkGroupIdY); - case SystemVariable::CtaIdZ: - return Operation(OperationCode::WorkGroupIdZ); - case SystemVariable::EqMask: - case SystemVariable::LtMask: - case SystemVariable::LeMask: - case SystemVariable::GtMask: - case SystemVariable::GeMask: - uses_warps = true; - switch (instr.sys20) { - case SystemVariable::EqMask: - return Operation(OperationCode::ThreadEqMask); - case SystemVariable::LtMask: - return Operation(OperationCode::ThreadLtMask); - case SystemVariable::LeMask: - return Operation(OperationCode::ThreadLeMask); - case SystemVariable::GtMask: - return Operation(OperationCode::ThreadGtMask); - case SystemVariable::GeMask: - return Operation(OperationCode::ThreadGeMask); - default: - UNREACHABLE(); - return Immediate(0u); - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); - return Immediate(0u); - } - }(); - SetRegister(bb, instr.gpr0, value); - - break; - } - case OpCode::Id::BRA: { - Node branch; - if (instr.bra.constant_buffer == 0) { - const u32 target = pc + instr.bra.GetBranchTarget(); - branch = Operation(OperationCode::Branch, Immediate(target)); - } else { - const u32 target = pc + 1; - const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - const Node operand = - Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - branch = Operation(OperationCode::BranchIndirect, operand); - } - - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::BRX: { - Node operand; - if (instr.brx.constant_buffer != 0) { - const s32 target = pc + 1; - const Node index = GetRegister(instr.gpr8); - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } else { - const s32 target = pc + instr.brx.GetBranchExtend(); - const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } - const Node branch = Operation(OperationCode::BranchIndirect, operand); - - const ConditionCode cc = instr.flow_condition_code; - if (cc != ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::SSY: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer flow is not supported"); - - if (disable_flow_stack) { - break; - } - - // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); - break; - } - case OpCode::Id::PBK: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer PBK is not supported"); - - if (disable_flow_stack) { - break; - } - - // PBK pushes to a stack the address where BRK will jump to. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); - break; - } - case OpCode::Id::SYNC: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); - - if (decompiled) { - break; - } - - // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); - break; - } - case OpCode::Id::BRK: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); - if (decompiled) { - break; - } - - // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); - break; - } - case OpCode::Id::IPA: { - const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Index index = attribute.index; - - Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(index, attribute.element); - - // Code taken from Ryujinx. - if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { - const u32 location = static_cast(index) - static_cast(Index::Attribute_0); - if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { - Node position_w = GetInputAttribute(Index::Position, 3); - value = Operation(OperationCode::FMul, move(value), move(position_w)); - } - } - - if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); - } - - value = GetSaturatedFloat(move(value), instr.ipa.saturate); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::OUT_R: { - UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, - "Stream buffer is not supported"); - - if (instr.out.emit) { - // gpr0 is used to store the next address and gpr8 contains the address to emit. - // Hardware uses pointers here but we just ignore it - bb.push_back(Operation(OperationCode::EmitVertex)); - SetRegister(bb, instr.gpr0, Immediate(0)); - } - if (instr.out.cut) { - bb.push_back(Operation(OperationCode::EndPrimitive)); - } - break; - } - case OpCode::Id::ISBERD: { - UNIMPLEMENTED_IF(instr.isberd.o != 0); - UNIMPLEMENTED_IF(instr.isberd.skew != 0); - UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); - UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); - LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); - SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); - break; - } - case OpCode::Id::BAR: { - UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); - bb.push_back(Operation(OperationCode::Barrier)); - break; - } - case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - const OperationCode type = [instr] { - switch (instr.membar.type) { - case Tegra::Shader::MembarType::CTA: - return OperationCode::MemoryBarrierGroup; - case Tegra::Shader::MembarType::GL: - return OperationCode::MemoryBarrierGlobal; - default: - UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); - return OperationCode::MemoryBarrierGlobal; - } - }(); - bb.push_back(Operation(type)); - break; - } - case OpCode::Id::DEPBAR: { - LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::PSETP: { - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - - if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled - SetPredicate(bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), - second_pred)); - } - break; - } - case OpCode::Id::CSETP: { - const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - const Node condition_code = GetConditionCode(instr.csetp.cc); - - const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); - - if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { - SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); - } - if (instr.csetp.pred0 != static_cast(Pred::UnusedIndex)) { - const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); - SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); - - const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); - const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); - - const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); - const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.pset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { -constexpr u64 NUM_CONDITION_CODES = 4; -constexpr u64 NUM_PREDICATES = 7; -} // namespace - -u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node apply_mask = [this, opcode, instr] { - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: - case OpCode::Id::P2R_IMM: - return Immediate(static_cast(instr.p2r_r2p.immediate_mask)); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; - - const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; - const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; - const auto get_entry = [this, cc](u64 entry) { - return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: { - Node mask = GetRegister(instr.gpr8); - - for (u64 entry = 0; entry < num_entries; ++entry) { - const u32 shift = static_cast(entry); - - Node apply = BitfieldExtract(apply_mask, shift, 1); - Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - - Node compare = BitfieldExtract(mask, offset + shift, 1); - Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - - Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); - bb.push_back(Conditional(condition, {move(code)})); - } - break; - } - case OpCode::Id::P2R_IMM: { - Node value = Immediate(0); - for (u64 entry = 0; entry < num_entries; ++entry) { - Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), - Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); - } - value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::ShfType; -using Tegra::Shader::ShfXmode; - -namespace { - -Node IsFull(Node shift) { - return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); -} - -Node Shift(OperationCode opcode, Node value, Node shift) { - Node shifted = Operation(opcode, move(value), shift); - return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); -} - -Node ClampShift(Node shift, s32 size = 32) { - shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); - return Operation(OperationCode::IMin, move(shift), Immediate(size)); -} - -Node WrapShift(Node shift, s32 size = 32) { - return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); -} - -Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); - Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); - } - - // And these when it's larger than or 32 - const bool is_signed = type == ShfType::S64; - const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(opcode, high, move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); - Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); - } - - // And these when it's larger than or 32 - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [this, instr] { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (const auto opid = opcode->get().GetId(); opid) { - case OpCode::Id::SHR_C: - case OpCode::Id::SHR_R: - case OpCode::Id::SHR_IMM: { - op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); - - Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - move(op_a), move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHL_C: - case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: { - Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHF_RIGHT_R: - case OpCode::Id::SHF_RIGHT_IMM: - case OpCode::Id::SHF_LEFT_R: - case OpCode::Id::SHF_LEFT_IMM: { - UNIMPLEMENTED_IF(instr.generates_cc); - UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - instr.shf.xmode.Value()); - - if (instr.is_b_imm) { - op_b = Immediate(static_cast(instr.shf.immediate)); - } - const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; - Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); - - Node negated_shift = Operation(OperationCode::INegate, shift); - Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); - - const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; - Node value = (is_right ? ShiftRight : ShiftLeft)( - move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null @@ -1,935 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); - return 0; - } -} - -u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - bool is_bindless = false; - switch (opcode->get().GetId()) { - case OpCode::Id::TEX: { - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); - break; - } - case OpCode::Id::TEX_B: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const TextureType texture_type{instr.tex_b.texture_type}; - const bool is_array = instr.tex_b.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, is_aoffi, {instr.gpr20})); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4_B: { - is_bindless = true; - [[fallthrough]]; - } - case OpCode::Id::TLD4: { - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) - : instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) - : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) - : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, - is_ptp, is_bindless)); - break; - } - case OpCode::Id::TLD4S: { - constexpr std::size_t num_coords = 2; - const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector coords; - std::vector aoffi; - Node depth_compare; - if (is_depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - if (is_aoffi) { - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - depth_compare = GetRegister(instr.gpr20.Value() + 1); - } else { - depth_compare = op_b; - } - } else { - // There's no depth compare - coords.push_back(op_a); - if (is_aoffi) { - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - } else { - coords.push_back(op_b); - } - } - const Node component = Immediate(static_cast(instr.tld4s.component)); - - SamplerInfo info; - info.is_shadow = is_depth_compare; - const std::optional sampler = GetSampler(instr.sampler, info); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, - {}, {}, component, element, {}}; - values[element] = Operation(OperationCode::TextureGather, meta, coords); - } - - if (instr.tld4s.fp16_flag) { - WriteTexsInstructionHalfFloat(bb, instr, values, true); - } else { - WriteTexsInstructionFloat(bb, instr, values, true); - } - break; - } - case OpCode::Id::TXD_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXD: { - UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const bool is_array = instr.txd.is_array != 0; - const auto derivate_reg = instr.gpr20.Value(); - const auto texture_type = instr.txd.texture_type.Value(); - const auto coord_count = GetCoordCount(texture_type); - u64 base_reg = instr.gpr8.Value(); - Node index_var; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - const std::optional sampler = - is_bindless ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); - WriteTexInstructionFloat(bb, instr, values); - break; - } - - if (is_bindless) { - base_reg++; - } - - std::vector coords; - std::vector derivates; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(base_reg + i)); - const std::size_t derivate = i * 2; - derivates.push_back(GetRegister(derivate_reg + derivate)); - derivates.push_back(GetRegister(derivate_reg + derivate + 1)); - } - - Node array_node = {}; - if (is_array) { - const Node info_reg = GetRegister(base_reg + coord_count); - array_node = BitfieldExtract(info_reg, 0, 16); - } - - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, - {}, {}, {}, element, index_var}; - values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); - } - - WriteTexInstructionFloat(bb, instr, values); - - break; - } - case OpCode::Id::TXQ_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXQ: { - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, - GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); - } - break; - } - case OpCode::Id::TMML_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - const auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) - : GetSampler(instr.sampler, info); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - const u64 base_index = is_array ? 1 : 0; - const u64 num_components = [texture_type] { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); - return 2; - } - }(); - // TODO: What's the array component used for? - - std::vector coords; - coords.reserve(num_components); - for (u64 component = 0; component < num_components; ++component) { - coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); - } - - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - Node value = Operation(OperationCode::TextureQueryLod, meta, coords); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::TLD: { - UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); - break; - } - case OpCode::Id::TLDS: { - const TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - const Node4 components = GetTldsCode(instr, texture_type, is_array); - - if (instr.tlds.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( - SamplerInfo info, std::optional sampler) { - if (info.IsComplete()) { - return info; - } - if (!sampler) { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); - info.is_array = info.is_array.value_or(false); - info.is_shadow = info.is_shadow.value_or(false); - info.is_buffer = info.is_buffer.value_or(false); - return info; - } - info.type = info.type.value_or(sampler->texture_type); - info.is_array = info.is_array.value_or(sampler->is_array != 0); - info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); - info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); - return info; -} - -std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { - const u32 offset = static_cast(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const SamplerEntry& entry) { return entry.offset == offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); -} - -std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, - SamplerInfo info, Node& index_var) { - const Node sampler_register = GetRegister(reg); - const auto [base_node, tracked_sampler_info] = - TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); - if (!base_node) { - UNREACHABLE(); - return std::nullopt; - } - - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 buffer = sampler_info->index; - const u32 offset = sampler_info->offset; - info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const SamplerEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const std::pair indices = sampler_info->indices; - const std::pair offsets = sampler_info->offsets; - info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); - - // Try to use an already created sampler if it exists - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [indices, offsets](const SamplerEntry& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const u32 next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 base_offset = sampler_info->base_offset / 4; - index_var = GetCustomVariable(sampler_info->bindless_var); - info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), - [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && - it->is_indexed); - return *it; - } - - uses_indexed_samplers = true; - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, true); - } - return std::nullopt; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporary(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, - bool ignore_mask) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - SetTemporary(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components, bool ignore_mask) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporary(bb, 0, first_value); - SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr28, GetTemporary(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector aoffi, - std::optional bindless_reg) { - const bool is_array = array != nullptr; - const bool is_shadow = depth_compare != nullptr; - const bool is_bindless = bindless_reg.has_value(); - - ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, - "Illegal texture type"); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = is_shadow; - info.is_buffer = false; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); - if (!sampler) { - return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; - } - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; - - Node bias; - Node lod; - switch (process_mode) { - case TextureProcessMode::None: - break; - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 field with - // an offset depending on the usage of the other registers. - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); - break; - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, - lod, {}, element, index_var}; - values[element] = Operation(opcode, meta, coords); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, std::optional bindless_reg) { - const bool lod_bias_enabled{ - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; - - const bool is_bindless = bindless_reg.has_value(); - - u64 parameter_register = instr.gpr20.Value(); - if (is_bindless) { - ++parameter_register; - } - - const u32 bias_lod_offset = (is_bindless ? 1 : 0); - if (lod_bias_enabled) { - ++parameter_register; - } - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 5); - const auto coord_count = std::get<0>(coord_counts); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - std::vector aoffi; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); - } - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - dc = GetRegister(parameter_register++); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, - aoffi, bindless_reg); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 4); - const auto coord_count = std::get<0>(coord_counts); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, - {}); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { - ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); - - const std::size_t coord_count = GetCoordCount(texture_type); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - - u64 parameter_register = instr.gpr20.Value(); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = depth_compare; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; - } - - std::vector aoffi, ptp; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); - } else if (is_ptp) { - ptp = GetPtpCoordinates( - {GetRegister(parameter_register++), GetRegister(parameter_register++)}); - } - - Node dc; - if (depth_compare) { - dc = GetRegister(parameter_register++); - } - - const Node component = is_bindless ? Immediate(static_cast(instr.tld4_b.component)) - : Immediate(static_cast(instr.tld4.component)); - - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, - index_var}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { - const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array != 0}; - const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; - const std::size_t coord_count{GetCoordCount(texture_type)}; - - u64 gpr8_cursor{instr.gpr8.Value()}; - const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; - - std::vector coords; - coords.reserve(coord_count); - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(gpr8_cursor++)); - } - - u64 gpr20_cursor{instr.gpr20.Value()}; - // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; - const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; - // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; - // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - - const std::optional sampler = GetSampler(instr.sampler, {}); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = false; - const std::optional sampler = GetSampler(instr.sampler, info); - - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - - std::vector coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - std::vector aoffi; - if (aoffi_enabled) { - aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - -std::vector ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, - bool is_tld4) { - const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; - const u32 size = is_tld4 ? 6 : 4; - const s32 wrap_value = is_tld4 ? 32 : 8; - const s32 diff_value = is_tld4 ? 64 : 16; - const u32 mask = (1U << size) - 1; - - std::vector aoffi; - aoffi.reserve(coord_count); - - const auto aoffi_immediate{ - TrackImmediate(aoffi_reg, global_code, static_cast(global_code.size()))}; - if (!aoffi_immediate) { - // Variable access, not supported on AMD. - LOG_WARNING(HW_GPU, - "AOFFI constant folding failed, some hardware might have graphical issues"); - for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); - aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return aoffi; - } - - for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; - if (value >= wrap_value) { - value -= diff_value; - } - aoffi.push_back(Immediate(value)); - } - return aoffi; -} - -std::vector ShaderIR::GetPtpCoordinates(std::array ptp_regs) { - static constexpr u32 num_entries = 8; - - std::vector ptp; - ptp.reserve(num_entries); - - const auto global_size = static_cast(global_code.size()); - const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); - const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); - if (!low || !high) { - for (u32 entry = 0; entry < num_entries; ++entry) { - const u32 reg = entry / 4; - const u32 offset = entry % 4; - const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); - ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return ptp; - } - - const u64 immediate = (static_cast(*high) << 32) | static_cast(*low); - for (u32 entry = 0; entry < num_entries; ++entry) { - s32 value = (immediate >> (entry * 8)) & 0b111111; - if (value >= 32) { - value -= 64; - } - ptp.push_back(Immediate(value)); - } - - return ptp; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::VideoType; -using Tegra::Shader::VmadShr; -using Tegra::Shader::VmnmxOperation; -using Tegra::Shader::VmnmxType; - -u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::VMNMX) { - DecodeVMNMX(bb, instr); - return pc; - } - - const Node op_a = - GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, - instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [this, instr] { - if (instr.video.use_register_b) { - return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, - instr.video.signed_b, instr.video.type_b, - instr.video.byte_height_b); - } - if (instr.video.signed_b) { - const auto imm = static_cast(instr.alu.GetImm20_16()); - return Immediate(static_cast(imm)); - } else { - return Immediate(instr.alu.GetImm20_16()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::VMAD: { - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node op_c = GetRegister(instr.gpr39); - - Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); - value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); - - if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { - const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); - value = - SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::VSETP: { - // We can't use the constant predicate as destination. - ASSERT(instr.vsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); - const Node second_pred = GetPredicate(instr.vsetp.pred39, false); - - const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); - - if (instr.vsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, - u64 byte_height) { - if (!is_chunk) { - return BitfieldExtract(op, static_cast(byte_height * 8), 8); - } - - switch (type) { - case VideoType::Size16_Low: - return BitfieldExtract(op, 0, 16); - case VideoType::Size16_High: - return BitfieldExtract(op, 16, 16); - case VideoType::Size32: - // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used - // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. - UNIMPLEMENTED(); - return Immediate(0); - case VideoType::Invalid: - UNREACHABLE_MSG("Invalid instruction encoding"); - return Immediate(0); - default: - UNREACHABLE(); - return Immediate(0); - } -} - -void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { - UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); - UNIMPLEMENTED_IF(instr.vmnmx.sat); - UNIMPLEMENTED_IF(instr.generates_cc); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node op_c = GetRegister(instr.gpr39); - - const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed - const bool is_oper2_signed = instr.vmnmx.is_dest_signed; - - const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; - Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); - - switch (instr.vmnmx.operation) { - case VmnmxOperation::Mrg_16H: - value = BitfieldInsert(move(op_c), move(value), 16, 16); - break; - case VmnmxOperation::Mrg_16L: - value = BitfieldInsert(move(op_c), move(value), 0, 16); - break; - case VmnmxOperation::Mrg_8B0: - value = BitfieldInsert(move(op_c), move(value), 0, 8); - break; - case VmnmxOperation::Mrg_8B2: - value = BitfieldInsert(move(op_c), move(value), 16, 8); - break; - case VmnmxOperation::Acc: - value = Operation(OperationCode::IAdd, move(value), move(op_c)); - break; - case VmnmxOperation::Min: - value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Max: - value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Nop: - break; - default: - UNREACHABLE(); - break; - } - - SetRegister(bb, instr.gpr0, move(value)); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::ShuffleOperation; -using Tegra::Shader::VoteOperation; - -namespace { - -OperationCode GetOperationCode(VoteOperation vote_op) { - switch (vote_op) { - case VoteOperation::All: - return OperationCode::VoteAll; - case VoteOperation::Any: - return OperationCode::VoteAny; - case VoteOperation::Eq: - return OperationCode::VoteEqual; - default: - UNREACHABLE_MSG("Invalid vote operation={}", vote_op); - return OperationCode::VoteAll; - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - // Signal the backend that this shader uses warp instructions. - uses_warps = true; - - switch (opcode->get().GetId()) { - case OpCode::Id::VOTE: { - const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); - const Node active = Operation(OperationCode::BallotThread, value); - const Node vote = Operation(GetOperationCode(instr.vote.operation), value); - SetRegister(bb, instr.gpr0, active); - SetPredicate(bb, instr.vote.dest_pred, vote); - break; - } - case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node index = instr.shfl.is_index_imm ? Immediate(static_cast(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - - Node thread_id = Operation(OperationCode::ThreadId); - Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); - Node seg_mask = BitfieldExtract(mask, 8, 16); - - Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); - Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); - Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, - Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - - Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { - switch (instr.shfl.operation) { - case ShuffleOperation::Idx: - return Operation(OperationCode::IBitwiseOr, - Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), - min_thread_id); - case ShuffleOperation::Down: - return Operation(OperationCode::IAdd, thread_id, index); - case ShuffleOperation::Up: - return Operation(OperationCode::IAdd, thread_id, - Operation(OperationCode::INegate, index)); - case ShuffleOperation::Bfly: - return Operation(OperationCode::IBitwiseXor, thread_id, index); - } - UNREACHABLE(); - return Immediate(0U); - }(); - - Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); - } else { - return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); - } - }(); - - SetPredicate(bb, instr.shfl.pred48, in_bounds); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); - break; - } - case OpCode::Id::FSWZADD: { - UNIMPLEMENTED_IF(instr.fswzadd.ndv); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node mask = Immediate(static_cast(instr.fswzadd.swizzle)); - SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF(instr.xmad.sign_a); - UNIMPLEMENTED_IF(instr.xmad.sign_b); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); - - Node op_a = GetRegister(instr.gpr8); - - // TODO(bunnei): Needs to be fixed once op_a or op_b is signed - UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); - const bool is_signed_a = instr.xmad.sign_a == 1; - const bool is_signed_b = instr.xmad.sign_b == 1; - const bool is_signed_c = is_signed_a; - - auto [is_merge, is_psl, is_high_b, mode, op_b_binding, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, - instr.xmad.product_shift_left_second, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, - instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RC: - return {false, - false, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, - instr.xmad.product_shift_left, - false, - instr.xmad.mode, - Immediate(static_cast(instr.xmad.imm20_16)), - GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; - } - }(); - - op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), - instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); - - const Node original_b = op_b_binding; - const Node op_b = - SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), - is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - - // we already check sign_a and sign_b is difference or not before so just use one in here. - Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); - if (is_psl) { - product = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); - } - SetTemporary(bb, 0, product); - product = GetTemporary(0); - - Node original_c = op_c; - const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&] { - switch (set_mode) { - case Tegra::Shader::XmadMode::None: - return original_c; - case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(std::move(original_c), 0, 16); - case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(std::move(original_c), 16, 16); - case Tegra::Shader::XmadMode::CBcc: { - Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), - std::move(shifted_b)); - } - case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); - const Node comp_b = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); - const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); - - const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_a, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, - Immediate(0x80000000)), - Immediate(0)); - const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_b, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, - Immediate(0x80000000)), - Immediate(0)); - - Node new_c = Operation( - OperationCode::Select, comp_minus_a, - SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), - original_c); - new_c = Operation( - OperationCode::Select, comp_minus_b, - SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), - std::move(new_c)); - - return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); - } - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - SetTemporary(bb, 1, op_c); - op_c = GetTemporary(1); - - // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); - SetTemporary(bb, 2, sum); - sum = GetTemporary(2); - if (is_merge) { - const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), - Immediate(0), Immediate(16)); - const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, - Immediate(16)); - sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); - } - - SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(sum)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { -namespace { -bool ExprIsBoolean(const Expr& expr) { - return std::holds_alternative(*expr); -} - -bool ExprBooleanGet(const Expr& expr) { - return std::get_if(expr.get())->value; -} -} // Anonymous namespace - -bool ExprAnd::operator==(const ExprAnd& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprAnd::operator!=(const ExprAnd& b) const { - return !operator==(b); -} - -bool ExprOr::operator==(const ExprOr& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprOr::operator!=(const ExprOr& b) const { - return !operator==(b); -} - -bool ExprNot::operator==(const ExprNot& b) const { - return *operand1 == *b.operand1; -} - -bool ExprNot::operator!=(const ExprNot& b) const { - return !operator==(b); -} - -Expr MakeExprNot(Expr first) { - if (std::holds_alternative(*first)) { - return std::get_if(first.get())->operand1; - } - return MakeExpr(std::move(first)); -} - -Expr MakeExprAnd(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? second : first; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? first : second; - } - return MakeExpr(std::move(first), std::move(second)); -} - -Expr MakeExprOr(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? first : second; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? second : first; - } - return MakeExpr(std::move(first), std::move(second)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second) { - return (*first) == (*second); -} - -bool ExprAreOpposite(const Expr& first, const Expr& second) { - if (std::holds_alternative(*first)) { - return ExprAreEqual(std::get_if(first.get())->operand1, second); - } - if (std::holds_alternative(*second)) { - return ExprAreEqual(std::get_if(second.get())->operand1, first); - } - return false; -} - -bool ExprIsTrue(const Expr& first) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first); - } - return false; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -class ExprAnd; -class ExprBoolean; -class ExprCondCode; -class ExprGprEqual; -class ExprNot; -class ExprOr; -class ExprPredicate; -class ExprVar; - -using ExprData = std::variant; -using Expr = std::shared_ptr; - -class ExprAnd final { -public: - explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprAnd& b) const; - bool operator!=(const ExprAnd& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprOr final { -public: - explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprOr& b) const; - bool operator!=(const ExprOr& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprNot final { -public: - explicit ExprNot(Expr a) : operand1{std::move(a)} {} - - bool operator==(const ExprNot& b) const; - bool operator!=(const ExprNot& b) const; - - Expr operand1; -}; - -class ExprVar final { -public: - explicit ExprVar(u32 index) : var_index{index} {} - - bool operator==(const ExprVar& b) const { - return var_index == b.var_index; - } - - bool operator!=(const ExprVar& b) const { - return !operator==(b); - } - - u32 var_index; -}; - -class ExprPredicate final { -public: - explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} - - bool operator==(const ExprPredicate& b) const { - return predicate == b.predicate; - } - - bool operator!=(const ExprPredicate& b) const { - return !operator==(b); - } - - u32 predicate; -}; - -class ExprCondCode final { -public: - explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} - - bool operator==(const ExprCondCode& b) const { - return cc == b.cc; - } - - bool operator!=(const ExprCondCode& b) const { - return !operator==(b); - } - - ConditionCode cc; -}; - -class ExprBoolean final { -public: - explicit ExprBoolean(bool val) : value{val} {} - - bool operator==(const ExprBoolean& b) const { - return value == b.value; - } - - bool operator!=(const ExprBoolean& b) const { - return !operator==(b); - } - - bool value; -}; - -class ExprGprEqual final { -public: - explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} - - bool operator==(const ExprGprEqual& b) const { - return gpr == b.gpr && value == b.value; - } - - bool operator!=(const ExprGprEqual& b) const { - return !operator==(b); - } - - u32 gpr; - u32 value; -}; - -template -Expr MakeExpr(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second); - -bool ExprAreOpposite(const Expr& first, const Expr& second); - -Expr MakeExprNot(Expr first); - -Expr MakeExprAnd(Expr first, Expr second); - -Expr MakeExprOr(Expr first, Expr second); - -bool ExprIsTrue(const Expr& first); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& shader_config{maxwell3d.regs.shader_config[static_cast(program)]}; - return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; -} - -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; - static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; - - const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & MASK) == SELF_JUMPING_BRANCH) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code, is_compute)); - return code; -} - -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { - size_t unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return static_cast(unique_identifier); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon::Shader { - -using ProgramCode = std::vector; - -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); - -/// Gets if the current instruction offset is a scheduler instruction -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute); - -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float - FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - FSwizzleAdd, /// (float a, float b, uint mask) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - IBitMSB, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - UBitMSB, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAnd2, /// (bool2 a) -> bool - - LogicalFOrdLessThan, /// (float a, float b) -> bool - LogicalFOrdEqual, /// (float a, float b) -> bool - LogicalFOrdLessEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterThan, /// (float a, float b) -> bool - LogicalFOrdNotEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterEqual, /// (float a, float b) -> bool - LogicalFOrdered, /// (float a, float b) -> bool - LogicalFUnordered, /// (float a, float b) -> bool - LogicalFUnordLessThan, /// (float a, float b) -> bool - LogicalFUnordEqual, /// (float a, float b) -> bool - LogicalFUnordLessEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterThan, /// (float a, float b) -> bool - LogicalFUnordNotEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterEqual, /// (float a, float b) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - LogicalAddCarry, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 - - ImageLoad, /// (MetaImage, int[N] coords) -> void - ImageStore, /// (MetaImage, int[N] coords) -> void - - AtomicImageAdd, /// (MetaImage, int[N] coords) -> void - AtomicImageAnd, /// (MetaImage, int[N] coords) -> void - AtomicImageOr, /// (MetaImage, int[N] coords) -> void - AtomicImageXor, /// (MetaImage, int[N] coords) -> void - AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - - AtomicUExchange, /// (memory, uint) -> uint - AtomicUAdd, /// (memory, uint) -> uint - AtomicUMin, /// (memory, uint) -> uint - AtomicUMax, /// (memory, uint) -> uint - AtomicUAnd, /// (memory, uint) -> uint - AtomicUOr, /// (memory, uint) -> uint - AtomicUXor, /// (memory, uint) -> uint - - AtomicIExchange, /// (memory, int) -> int - AtomicIAdd, /// (memory, int) -> int - AtomicIMin, /// (memory, int) -> int - AtomicIMax, /// (memory, int) -> int - AtomicIAnd, /// (memory, int) -> int - AtomicIOr, /// (memory, int) -> int - AtomicIXor, /// (memory, int) -> int - - ReduceUAdd, /// (memory, uint) -> void - ReduceUMin, /// (memory, uint) -> void - ReduceUMax, /// (memory, uint) -> void - ReduceUAnd, /// (memory, uint) -> void - ReduceUOr, /// (memory, uint) -> void - ReduceUXor, /// (memory, uint) -> void - - ReduceIAdd, /// (memory, int) -> void - ReduceIMin, /// (memory, int) -> void - ReduceIMax, /// (memory, int) -> void - ReduceIAnd, /// (memory, int) -> void - ReduceIOr, /// (memory, int) -> void - ReduceIXor, /// (memory, int) -> void - - Branch, /// (uint branch_target) -> void - BranchIndirect, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - InvocationId, /// () -> int - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - BallotThread, /// (bool) -> uint - VoteAll, /// (bool) -> bool - VoteAny, /// (bool) -> bool - VoteEqual, /// (bool) -> bool - - ThreadId, /// () -> uint - ThreadEqMask, /// () -> uint - ThreadGeMask, /// () -> uint - ThreadGtMask, /// () -> uint - ThreadLeMask, /// () -> uint - ThreadLtMask, /// () -> uint - ShuffleIndexed, /// (uint value, uint index) -> uint - - Barrier, /// () -> void - MemoryBarrierGroup, /// () -> void - MemoryBarrierGlobal, /// () -> void - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - -enum class MetaStackClass { - Ssy, - Pbk, -}; - -class OperationNode; -class ConditionalNode; -class GprNode; -class CustomVarNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; -class CbufNode; -class LmemNode; -class PatchNode; -class SmemNode; -class GmemNode; -class CommentNode; - -using NodeData = std::variant; -using Node = std::shared_ptr; -using Node4 = std::array; -using NodeBlock = std::vector; - -struct ArraySamplerNode; -struct BindlessSamplerNode; -struct SeparateSamplerNode; - -using TrackSamplerData = std::variant; -using TrackSampler = std::shared_ptr; - -struct SamplerEntry { - /// Bound samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, - bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, - is_buffer{is_buffer_}, is_indexed{is_indexed_} {} - - /// Separate sampler constructor - explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} - - /// Bindless samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { - } - - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 secondary_offset = 0; ///< Secondary offset in the const buffer. - u32 buffer = 0; ///< Buffer where the bindless sampler is read. - u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. - u32 size = 1; ///< Size of the sampler. - - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. - bool is_separated = false; ///< Whether the image and sampler is separated or not. -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct ArraySamplerNode { - u32 index; - u32 base_offset; - u32 bindless_var; -}; - -/// Represents a tracked separate sampler image pair that was folded statically -struct SeparateSamplerNode { - std::pair indices; - std::pair offsets; -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct BindlessSamplerNode { - u32 index; - u32 offset; -}; - -struct ImageEntry { -public: - /// Bound images constructor - explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, type{type_} {} - - /// Bindless samplers constructor - explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} - - void MarkWrite() { - is_written = true; - } - - void MarkRead() { - is_read = true; - } - - void MarkAtomic() { - MarkWrite(); - MarkRead(); - is_atomic = true; - } - - u32 index = 0; - u32 offset = 0; - u32 buffer = 0; - - Tegra::Shader::ImageType type{}; - bool is_bindless = false; - bool is_written = false; - bool is_read = false; - bool is_atomic = false; -}; - -struct GlobalMemoryBase { - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - - [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - -/// Parameters describing an arithmetic operation -struct MetaArithmetic { - bool precise{}; ///< Whether the operation can be constraint or not -}; - -/// Parameters describing a texture sampler -struct MetaTexture { - SamplerEntry sampler; - Node array; - Node depth_compare; - std::vector aoffi; - std::vector ptp; - std::vector derivates; - Node bias; - Node lod; - Node component; - u32 element{}; - Node index; -}; - -struct MetaImage { - const ImageEntry& image; - std::vector values; - u32 element{}; -}; - -/// Parameters that modify an operation but are not part of any particular operand -using Meta = - std::variant; - -class AmendNode { -public: - [[nodiscard]] std::optional GetAmendIndex() const { - if (amend_index == amend_null_index) { - return std::nullopt; - } - return {amend_index}; - } - - void SetAmendIndex(std::size_t index) { - amend_index = index; - } - - void ClearAmend() { - amend_index = amend_null_index; - } - -private: - static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; - std::size_t amend_index{amend_null_index}; -}; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final : public AmendNode { -public: - explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - - explicit OperationNode(OperationCode code_, Meta meta_) - : OperationNode(code_, std::move(meta_), std::vector{}) {} - - explicit OperationNode(OperationCode code_, std::vector operands_) - : OperationNode(code_, Meta{}, std::move(operands_)) {} - - explicit OperationNode(OperationCode code_, Meta meta_, std::vector operands_) - : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} - - template - explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) - : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - - [[nodiscard]] OperationCode GetCode() const { - return code; - } - - [[nodiscard]] const Meta& GetMeta() const { - return meta; - } - - [[nodiscard]] std::size_t GetOperandsCount() const { - return operands.size(); - } - - [[nodiscard]] const Node& operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - OperationCode code{}; - Meta meta{}; - std::vector operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final : public AmendNode { -public: - explicit ConditionalNode(Node condition_, std::vector&& code_) - : condition{std::move(condition_)}, code{std::move(code_)} {} - - [[nodiscard]] const Node& GetCondition() const { - return condition; - } - - [[nodiscard]] const std::vector& GetCode() const { - return code; - } - -private: - Node condition; ///< Condition to be satisfied - std::vector code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return static_cast(index); - } - -private: - Tegra::Shader::Register index{}; -}; - -/// A custom variable -class CustomVarNode final { -public: - explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - - [[nodiscard]] constexpr u32 GetValue() const { - return value; - } - -private: - u32 value{}; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - - [[nodiscard]] constexpr InternalFlag GetFlag() const { - return flag; - } - -private: - InternalFlag flag{}; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) - : index{index_}, negated{negated_} {} - - [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { - return index; - } - - [[nodiscard]] constexpr bool IsNegated() const { - return negated; - } - -private: - Tegra::Shader::Pred index{}; - bool negated{}; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) - : buffer{std::move(buffer_)}, index{index_}, element{element_} {} - - // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address_, Node buffer_ = {}) - : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - - [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - [[nodiscard]] u32 GetElement() const { - return element; - } - - [[nodiscard]] const Node& GetBuffer() const { - return buffer; - } - - [[nodiscard]] bool IsPhysicalBuffer() const { - return static_cast(physical_address); - } - - [[nodiscard]] const Node& GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address; - Node buffer; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Patch memory (used to communicate tessellation stages). -class PatchNode final { -public: - explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - - [[nodiscard]] constexpr u32 GetOffset() const { - return offset; - } - -private: - u32 offset{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - - [[nodiscard]] u32 GetIndex() const { - return index; - } - - [[nodiscard]] const Node& GetOffset() const { - return offset; - } - -private: - u32 index{}; - Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit LmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Shared memory node -class SmemNode final { -public: - explicit SmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) - : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, - descriptor{descriptor_} {} - - [[nodiscard]] const Node& GetRealAddress() const { - return real_address; - } - - [[nodiscard]] const Node& GetBaseAddress() const { - return base_address; - } - - [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - Node real_address; - Node base_address; - GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text_) : text{std::move(text_)} {} - - [[nodiscard]] const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -Node Conditional(Node condition, std::vector code) { - return MakeNode(std::move(condition), std::move(code)); -} - -Node Comment(std::string text) { - return MakeNode(std::move(text)); -} - -Node Immediate(u32 value) { - return MakeNode(value); -} - -Node Immediate(s32 value) { - return Immediate(static_cast(value)); -} - -Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); -} - -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldExtract: - return OperationCode::UBitfieldExtract; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::AtomicIExchange: - return OperationCode::AtomicUExchange; - case OperationCode::AtomicIAdd: - return OperationCode::AtomicUAdd; - case OperationCode::AtomicIMin: - return OperationCode::AtomicUMin; - case OperationCode::AtomicIMax: - return OperationCode::AtomicUMax; - case OperationCode::AtomicIAnd: - return OperationCode::AtomicUAnd; - case OperationCode::AtomicIOr: - return OperationCode::AtomicUOr; - case OperationCode::AtomicIXor: - return OperationCode::AtomicUXor; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); - return {}; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -/// This arithmetic operation cannot be constraint -inline constexpr MetaArithmetic PRECISE = {true}; -/// This arithmetic operation can be optimized away -inline constexpr MetaArithmetic NO_PRECISE = {false}; - -/// Creates a conditional node -Node Conditional(Node condition, std::vector code); - -/// Creates a commentary node -Node Comment(std::string text); - -/// Creates an u32 immediate -Node Immediate(u32 value); - -/// Creates a s32 immediate -Node Immediate(s32 value); - -/// Creates a f32 immediate -Node Immediate(f32 value); - -/// Converts an signed operation code to an unsigned operation code -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - -template -Node MakeNode(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -template -TrackSampler MakeTrackSampler(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T{std::forward(args)...}); -} - -template -Node Operation(OperationCode code, Args&&... args) { - if constexpr (sizeof...(args) == 0) { - return MakeNode(code); - } else if constexpr (std::is_convertible_v>, - Meta>) { - return MakeNode(code, std::forward(args)...); - } else { - return MakeNode(code, Meta{}, std::forward(args)...); - } -} - -template -Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { - return Operation(SignedToUnsignedCode(code, is_signed), std::forward(args)...); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::ConstBufferEngineInterface; -using Tegra::Engines::SamplerDescriptor; -using Tegra::Engines::ShaderType; - -namespace { - -GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage == ShaderType::Compute) { - return {}; - } - - auto& graphics = dynamic_cast(engine); - - return { - .tfb_layouts = graphics.regs.tfb_layouts, - .tfb_varying_locs = graphics.regs.tfb_varying_locs, - .primitive_topology = graphics.regs.draw.topology, - .tessellation_primitive = graphics.regs.tess_mode.prim, - .tessellation_spacing = graphics.regs.tess_mode.spacing, - .tfb_enabled = graphics.regs.tfb_enabled != 0, - .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, - }; -} - -ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage != ShaderType::Compute) { - return {}; - } - - auto& compute = dynamic_cast(engine); - const auto& launch = compute.launch_description; - - return { - .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, - .shared_memory_size_in_words = launch.shared_alloc, - .local_memory_size_in_words = launch.local_pos_alloc, - }; -} - -} // Anonymous namespace - -Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) - : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, - bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} - -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) - : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( - shader_stage, engine_)} {} - -Registry::~Registry() = default; - -std::optional Registry::ObtainKey(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainSeparateSampler( - std::pair buffers, std::pair offsets) { - SeparateSamplerKey key; - key.buffers = buffers; - key.offsets = offsets; - const auto iter = separate_samplers.find(key); - if (iter != separate_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - - const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); - const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); - const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); - separate_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -bool Registry::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool Registry::HasEqualKeys(const Registry& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -const GraphicsInfo& Registry::GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; -} - -const ComputeInfo& Registry::GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -struct SeparateSamplerKey { - std::pair buffers; - std::pair offsets; -}; - -} // namespace VideoCommon::Shader - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { - return std::hash{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ - key.offsets.second); - } -}; - -template <> -struct equal_to { - bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, - const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { - return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; - } -}; - -} // namespace std - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map; -using SeparateSamplerMap = - std::unordered_map; -using BindlessSamplerMap = - std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -struct GraphicsInfo { - using Maxwell = Tegra::Engines::Maxwell3D::Regs; - - std::array - tfb_layouts{}; - std::array, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; - Maxwell::PrimitiveTopology primitive_topology{}; - Maxwell::TessellationPrimitive tessellation_primitive{}; - Maxwell::TessellationSpacing tessellation_spacing{}; - bool tfb_enabled = false; - bool tessellation_clockwise = false; -}; -static_assert(std::is_trivially_copyable_v && - std::is_standard_layout_v); - -struct ComputeInfo { - std::array workgroup_size{}; - u32 shared_memory_size_in_words = 0; - u32 local_memory_size_in_words = 0; -}; -static_assert(std::is_trivially_copyable_v && std::is_standard_layout_v); - -struct SerializedRegistryInfo { - VideoCore::GuestDriverProfile guest_driver_profile; - u32 bound_buffer = 0; - GraphicsInfo graphics; - ComputeInfo compute; -}; - -/** - * The Registry is a class use to interface the 3D and compute engines with the shader compiler. - * With it, the shader can obtain required data from GPU state and store it for disk shader - * compilation. - */ -class Registry { -public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); - - explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine_); - - ~Registry(); - - /// Retrieves a key from the registry, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional ObtainKey(u32 buffer, u32 offset); - - std::optional ObtainBoundSampler(u32 offset); - - std::optional ObtainSeparateSampler( - std::pair buffers, std::pair offsets); - - std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Checks keys and samplers against engine's current const buffers. - /// Returns true if they are the same value, false otherwise. - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the registry. - bool HasEqualKeys(const Registry& rhs) const; - - /// Returns graphics information from this shader - const GraphicsInfo& GetGraphicsInfo() const; - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - SeparateSamplerMap separate_samplers; - BindlessSamplerMap bindless_samplers; - u32 bound_buffer; - GraphicsInfo graphics_info; - ComputeInfo compute_info; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Attribute; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaMode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredCondition; -using Tegra::Shader::PredOperation; -using Tegra::Shader::Register; - -ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, - Registry& registry_) - : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ - registry_} { - Decode(); - PostDecode(); -} - -ShaderIR::~ShaderIR() = default; - -Node ShaderIR::GetRegister(Register reg) { - if (reg != Register::ZeroIndex) { - used_registers.insert(static_cast(reg)); - } - return MakeNode(reg); -} - -Node ShaderIR::GetCustomVariable(u32 id) { - return MakeNode(id); -} - -Node ShaderIR::GetImmediate19(Instruction instr) { - return Immediate(instr.alu.GetImm20_19()); -} - -Node ShaderIR::GetImmediate32(Instruction instr) { - return Immediate(instr.alu.GetImm20_32()); -} - -Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); - - return MakeNode(index, Immediate(offset)); -} - -Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); - - Node final_offset = [&] { - // Attempt to inline constant buffer without a variable offset. This is done to allow - // tracking LDC calls. - if (const auto gpr = std::get_if(&*node)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - return Immediate(offset); - } - } - return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); - }(); - return MakeNode(index, std::move(final_offset)); -} - -Node ShaderIR::GetPredicate(u64 pred_, bool negated) { - const auto pred = static_cast(pred_); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - used_predicates.insert(pred); - } - - return MakeNode(pred, negated); -} - -Node ShaderIR::GetPredicate(bool immediate) { - return GetPredicate(static_cast(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); -} - -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_input_attributes.emplace(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { - uses_physical_attributes = true; - return MakeNode(GetRegister(physical_address), buffer); -} - -Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_output_attributes.insert(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - Node node = MakeNode(flag); - if (negated) { - return Operation(OperationCode::LogicalNegate, std::move(node)); - } - return node; -} - -Node ShaderIR::GetLocalMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetSharedMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetTemporary(u32 id) { - return GetRegister(Register::ZeroIndex + 1 + id); -} - -Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { - switch (size) { - case Register::Size::Byte: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - return value; - case Register::Size::Short: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - return value; - case Register::Size::Word: - // Default - do nothing - return value; - default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", size); - return value; - } -} - -Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { - if (!is_signed) { - // Absolute or negate on an unsigned is pointless - return value; - } - if (absolute) { - value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { - Node value = Immediate(instr.half_imm.PackImmediates()); - if (!has_negation) { - return value; - } - - Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); - Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - - return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), - std::move(second_negate)); -} - -Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { - return Operation(OperationCode::HUnpack, type, std::move(value)); -} - -Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { - switch (merge) { - case Tegra::Shader::HalfMerge::H0_H1: - return src; - case Tegra::Shader::HalfMerge::F32: - return Operation(OperationCode::HMergeF32, std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H0: - return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H1: - return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); - } - UNREACHABLE(); - return src; -} - -Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), - GetPredicate(true)); - } - return value; -} - -Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - if (condition == PredCondition::T) { - return GetPredicate(true); - } else if (condition == PredCondition::F) { - return GetPredicate(false); - } - - static constexpr std::array comparison_table{ - OperationCode(0), - OperationCode::LogicalFOrdLessThan, // LT - OperationCode::LogicalFOrdEqual, // EQ - OperationCode::LogicalFOrdLessEqual, // LE - OperationCode::LogicalFOrdGreaterThan, // GT - OperationCode::LogicalFOrdNotEqual, // NE - OperationCode::LogicalFOrdGreaterEqual, // GE - OperationCode::LogicalFOrdered, // NUM - OperationCode::LogicalFUnordered, // NAN - OperationCode::LogicalFUnordLessThan, // LTU - OperationCode::LogicalFUnordEqual, // EQU - OperationCode::LogicalFUnordLessEqual, // LEU - OperationCode::LogicalFUnordGreaterThan, // GTU - OperationCode::LogicalFUnordNotEqual, // NEU - OperationCode::LogicalFUnordGreaterEqual, // GEU - }; - const std::size_t index = static_cast(condition); - ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - - return Operation(comparison_table[index], op_a, op_b); -} - -Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, - std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); -} - -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); -} - -OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static constexpr std::array operation_table{ - OperationCode::LogicalAnd, - OperationCode::LogicalOr, - OperationCode::LogicalXor, - }; - - const auto index = static_cast(operation); - if (index >= operation_table.size()) { - UNIMPLEMENTED_MSG("Unknown predicate operation."); - return {}; - } - - return operation_table[index]; -} - -Node ShaderIR::GetConditionCode(ConditionCode cc) const { - switch (cc) { - case ConditionCode::NEU: - return GetInternalFlag(InternalFlag::Zero, true); - case ConditionCode::FCSM_TR: - UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); - return MakeNode(Pred::NeverExecute, false); - default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); - return MakeNode(Pred::NeverExecute, false); - } -} - -void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { - bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); -} - -void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); -} - -void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); -} - -void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { - SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); -} - -void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), - Immediate(offset), Immediate(bits)); -} - -Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), - Immediate(bits)); -} - -void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { - switch (index) { - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - break; - case 1: - uses_layer = true; - break; - case 2: - uses_viewport_index = true; - break; - case 3: - uses_point_size = true; - break; - } - break; - case Attribute::Index::TessCoordInstanceIDVertexID: - switch (element) { - case 2: - uses_instance_id = true; - break; - case 3: - uses_vertex_id = true; - break; - } - break; - case Attribute::Index::ClipDistances0123: - case Attribute::Index::ClipDistances4567: { - const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; - used_clip_distances.at(clip_index) = true; - break; - } - case Attribute::Index::FrontColor: - case Attribute::Index::FrontSecondaryColor: - case Attribute::Index::BackColor: - case Attribute::Index::BackSecondaryColor: - uses_legacy_varyings = true; - break; - default: - if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { - uses_legacy_varyings = true; - } - break; - } -} - -std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const auto id = amend_code.size(); - amend_code.push_back(std::move(new_amend)); - return id; -} - -u32 ShaderIR::NewCustomVariable() { - return num_custom_variables++; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct ShaderBlock; - -constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; - -struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) - : max_offset{max_offset_}, is_indirect{is_indirect_} {} - - constexpr ConstBuffer() = default; - - void MarkAsUsed(u64 offset) { - max_offset = std::max(max_offset, static_cast(offset)); - } - - void MarkAsUsedIndirect() { - is_indirect = true; - } - - bool IsIndirect() const { - return is_indirect; - } - - u32 GetSize() const { - return max_offset + static_cast(sizeof(float)); - } - - u32 GetMaxOffset() const { - return max_offset; - } - -private: - u32 max_offset = 0; - bool is_indirect = false; -}; - -struct GlobalMemoryUsage { - bool is_read{}; - bool is_written{}; -}; - -class ShaderIR final { -public: - explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, - CompilerSettings settings_, Registry& registry_); - ~ShaderIR(); - - const std::map& GetBasicBlocks() const { - return basic_blocks; - } - - const std::set& GetRegisters() const { - return used_registers; - } - - const std::set& GetPredicates() const { - return used_predicates; - } - - const std::set& GetInputAttributes() const { - return used_input_attributes; - } - - const std::set& GetOutputAttributes() const { - return used_output_attributes; - } - - const std::map& GetConstantBuffers() const { - return used_cbufs; - } - - const std::list& GetSamplers() const { - return used_samplers; - } - - const std::list& GetImages() const { - return used_images; - } - - const std::array& GetClipDistances() - const { - return used_clip_distances; - } - - const std::map& GetGlobalMemory() const { - return used_global_memory; - } - - std::size_t GetLength() const { - return static_cast(coverage_end * sizeof(u64)); - } - - bool UsesLayer() const { - return uses_layer; - } - - bool UsesViewportIndex() const { - return uses_viewport_index; - } - - bool UsesPointSize() const { - return uses_point_size; - } - - bool UsesInstanceId() const { - return uses_instance_id; - } - - bool UsesVertexId() const { - return uses_vertex_id; - } - - bool UsesLegacyVaryings() const { - return uses_legacy_varyings; - } - - bool UsesYNegate() const { - return uses_y_negate; - } - - bool UsesWarps() const { - return uses_warps; - } - - bool HasPhysicalAttributes() const { - return uses_physical_attributes; - } - - const Tegra::Shader::Header& GetHeader() const { - return header; - } - - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - u32 ConvertAddressToNvidiaSpace(u32 address) const { - return (address - main_offset) * static_cast(sizeof(Tegra::Shader::Instruction)); - } - - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; - - const Node& GetAmendNode(std::size_t index) const { - return amend_code[index]; - } - - u32 GetNumCustomVariables() const { - return num_custom_variables; - } - -private: - friend class ASTDecoder; - - struct SamplerInfo { - std::optional type; - std::optional is_array; - std::optional is_shadow; - std::optional is_buffer; - - constexpr bool IsComplete() const noexcept { - return type && is_array && is_shadow && is_buffer; - } - }; - - void Decode(); - void PostDecode(); - - NodeBlock DecodeRange(u32 begin, u32 end); - void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); - void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); - - /** - * Decodes a single instruction from Tegra to IR. - * @param bb Basic block where the nodes will be written to. - * @param pc Program counter. Offset to decode. - * @return Next address to decode. - */ - u32 DecodeInstr(NodeBlock& bb, u32 pc); - - u32 DecodeArithmetic(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); - u32 DecodeBfe(NodeBlock& bb, u32 pc); - u32 DecodeBfi(NodeBlock& bb, u32 pc); - u32 DecodeShift(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); - u32 DecodeFfma(NodeBlock& bb, u32 pc); - u32 DecodeHfma2(NodeBlock& bb, u32 pc); - u32 DecodeConversion(NodeBlock& bb, u32 pc); - u32 DecodeWarp(NodeBlock& bb, u32 pc); - u32 DecodeMemory(NodeBlock& bb, u32 pc); - u32 DecodeTexture(NodeBlock& bb, u32 pc); - u32 DecodeImage(NodeBlock& bb, u32 pc); - u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeFloatSet(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); - u32 DecodeHalfSet(NodeBlock& bb, u32 pc); - u32 DecodeVideo(NodeBlock& bb, u32 pc); - u32 DecodeXmad(NodeBlock& bb, u32 pc); - u32 DecodeOther(NodeBlock& bb, u32 pc); - - /// Generates a node for a passed register. - Node GetRegister(Tegra::Shader::Register reg); - /// Generates a node for a custom variable - Node GetCustomVariable(u32 id); - /// Generates a node representing a 19-bit immediate value - Node GetImmediate19(Tegra::Shader::Instruction instr); - /// Generates a node representing a 32-bit immediate value - Node GetImmediate32(Tegra::Shader::Instruction instr); - /// Generates a node representing a constant buffer - Node GetConstBuffer(u64 index, u64 offset); - /// Generates a node representing a constant buffer with a variadic offset - Node GetConstBufferIndirect(u64 index, u64 offset, Node node); - /// Generates a node for a passed predicate. It can be optionally negated - Node GetPredicate(u64 pred, bool negated = false); - /// Generates a predicate node for an immediate true or false value - Node GetPredicate(bool immediate); - /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); - /// Generates a node representing a physical input attribute. - Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); - /// Generates a node representing an output attribute. Keeps track of used attributes. - Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); - /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false) const; - /// Generates a node representing a local memory address - Node GetLocalMemory(Node address); - /// Generates a node representing a shared memory address - Node GetSharedMemory(Node address); - /// Generates a temporary, internally it uses a post-RZ register - Node GetTemporary(u32 id); - - /// Sets a register. src value must be a number-evaluated node. - void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); - /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(NodeBlock& bb, u64 dest, Node src); - /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address with a value. - void SetLocalMemory(NodeBlock& bb, Node address, Node value); - /// Sets a shared memory address with a value. - void SetSharedMemory(NodeBlock& bb, Node address, Node value); - /// Sets a temporary. Internally it uses a post-RZ register - void SetTemporary(NodeBlock& bb, u32 id, Node value); - - /// Sets internal flags from a float - void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); - /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); - - /// Conditionally absolute/negated float. Absolute is applied first - Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); - /// Conditionally saturates a float - Node GetSaturatedFloat(Node value, bool saturate = true); - - /// Converts an integer to different sizes. - Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); - /// Conditionally absolute/negated integer. Absolute is applied first - Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); - - /// Unpacks a half immediate from an instruction - Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); - /// Unpacks a binary value into a half float pair with a type format - Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); - /// Merges a half pair into another value - Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); - /// Conditionally absolute/negated half float pair. Absolute is applied first - Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); - /// Conditionally saturates a half float pair - Node GetSaturatedHalfFloat(Node value, bool saturate = true); - - /// Get image component value by type and size - std::pair GetComponentValue(Tegra::Texture::ComponentType component_type, - u32 component_size, Node original_value); - - /// Returns a predicate comparing two floats - Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - /// Returns a predicate comparing two integers - Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, - Node op_a, Node op_b); - /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - - /// Returns a predicate combiner operation - OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - - /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, - std::optional sampler); - - /// Accesses a texture sampler. - std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); - - /// Accesses a texture sampler for a bindless texture. - std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); - - /// Accesses an image. - ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); - - /// Access a bindless image sampler. - ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - - /// Extracts a sequence of bits from a node - Node BitfieldExtract(Node value, u32 offset, u32 bits); - - /// Inserts a sequence of bits from a node - Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); - - /// Marks the usage of a input or output attribute. - void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); - - /// Decodes VMNMX instruction and inserts its code into the passed basic block. - void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); - - void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); - - void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi, - std::optional bindless_reg); - - Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); - - Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, - bool is_bindless); - - Node4 GetTldCode(Tegra::Shader::Instruction instr); - - Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool is_array); - - std::tuple ValidateAndGetCoordinateElement( - Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, - bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); - - std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - std::vector GetPtpCoordinates(std::array ptp_regs); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, - std::optional bindless_reg); - - Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, - u64 byte_height); - - void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, - Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, - Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut, bool sets_cc); - - std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); - - std::pair HandleBindlessIndirectRead(const CbufNode& cbuf, - const OperationNode& operation, - Node gpr, Node base_offset, - Node tracked, const NodeBlock& code, - s64 cursor); - - std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const; - - std::tuple TrackGlobalMemory(NodeBlock& bb, - Tegra::Shader::Instruction instr, - bool is_read, bool is_write); - - /// Register new amending code and obtain the reference id. - std::size_t DeclareAmend(Node new_amend); - - u32 NewCustomVariable(); - - const ProgramCode& program_code; - const u32 main_offset; - const CompilerSettings settings; - Registry& registry; - - bool decompiled{}; - bool disable_flow_stack{}; - - u32 coverage_begin{}; - u32 coverage_end{}; - - std::map basic_blocks; - NodeBlock global_code; - ASTManager program_manager{true, true}; - std::vector amend_code; - u32 num_custom_variables{}; - - std::set used_registers; - std::set used_predicates; - std::set used_input_attributes; - std::set used_output_attributes; - std::map used_cbufs; - std::list used_samplers; - std::list used_images; - std::array used_clip_distances{}; - std::map used_global_memory; - bool uses_layer{}; - bool uses_viewport_index{}; - bool uses_point_size{}; - bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes - bool uses_instance_id{}; - bool uses_vertex_id{}; - bool uses_legacy_varyings{}; - bool uses_y_negate{}; - bool uses_warps{}; - bool uses_indexed_samplers{}; - - Tegra::Shader::Header header; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -std::pair FindOperation(const NodeBlock& code, s64 cursor, - OperationCode operation_code) { - for (; cursor >= 0; --cursor) { - Node node = code.at(cursor); - - if (const auto operation = std::get_if(&*node)) { - if (operation->GetCode() == operation_code) { - return {std::move(node), cursor}; - } - } - - if (const auto conditional = std::get_if(&*node)) { - const auto& conditional_code = conditional->GetCode(); - auto result = FindOperation( - conditional_code, static_cast(conditional_code.size() - 1), operation_code); - auto& found = result.first; - if (found) { - return {std::move(found), cursor}; - } - } - } - return {}; -} - -std::optional> DecoupleIndirectRead(const OperationNode& operation) { - if (operation.GetCode() != OperationCode::UAdd) { - return std::nullopt; - } - Node gpr; - Node offset; - ASSERT(operation.GetOperandsCount() == 2); - for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { - Node operand = operation[i]; - if (std::holds_alternative(*operand)) { - offset = operation[i]; - } else if (std::holds_alternative(*operand)) { - gpr = operation[i]; - } - } - if (offset && gpr) { - return std::make_pair(gpr, offset); - } - return std::nullopt; -} - -bool AmendNodeCv(std::size_t amend_index, Node node) { - if (const auto operation = std::get_if(&*node)) { - operation->SetAmendIndex(amend_index); - return true; - } - if (const auto conditional = std::get_if(&*node)) { - conditional->SetAmendIndex(amend_index); - return true; - } - return false; -} - -} // Anonymous namespace - -std::pair ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { - if (const auto cbuf = std::get_if(&*tracked)) { - const u32 cbuf_index = cbuf->GetIndex(); - - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - auto track = MakeTrackSampler(cbuf_index, immediate->GetValue()); - return {tracked, track}; - } - if (const auto operation = std::get_if(&*offset)) { - const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf_index) { - return {}; - } - if (const std::optional pair = DecoupleIndirectRead(*operation)) { - auto [gpr, base_offset] = *pair; - return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, - code, cursor); - } - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackBindlessSampler(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - const OperationNode& op = *operation; - - const OperationCode opcode = operation->GetCode(); - if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { - ASSERT(op.GetOperandsCount() == 2); - auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); - auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); - if (node_a && node_b) { - auto track = MakeTrackSampler(std::pair{index_a, index_b}, - std::pair{offset_a, offset_b}); - return {tracked, std::move(track)}; - } - } - std::size_t i = op.GetOperandsCount(); - while (i--) { - if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { - // Constant buffer found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackBindlessSampler(tracked, conditional_code, - static_cast(conditional_code.size())); - } - return {}; -} - -std::pair ShaderIR::HandleBindlessIndirectRead( - const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, - const NodeBlock& code, s64 cursor) { - const auto offset_imm = std::get(*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); - Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); - - Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - - // TODO: Implement bindless index custom variable - auto track = - MakeTrackSampler(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); - return {tracked, track}; -} - -std::tuple ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, - s64 cursor) const { - if (const auto cbuf = std::get_if(&*tracked)) { - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - return {tracked, cbuf->GetIndex(), immediate->GetValue()}; - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackCbuf(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { - // Cbuf found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackCbuf(tracked, conditional_code, static_cast(conditional_code.size())); - } - return {}; -} - -std::optional ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register - // that it uses as operand - const auto result = TrackRegister(&std::get(*tracked), code, cursor - 1); - const auto& found = result.first; - if (!found) { - return std::nullopt; - } - if (const auto immediate = std::get_if(&*found)) { - return immediate->GetValue(); - } - return std::nullopt; -} - -std::pair ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const { - for (; cursor >= 0; --cursor) { - const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); - if (!found_node) { - return {}; - } - const auto operation = std::get_if(&*found_node); - ASSERT(operation); - - const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if(&*target)) { - if (gpr_target->GetIndex() == tracked->GetIndex()) { - return {(*operation)[1], new_cursor}; - } - } - } - return {}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/transform_feedback.h" - -namespace VideoCommon::Shader { - -namespace { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 - -/// Attribute offsets that describe a vector -constexpr std::array VECTORS = { - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] -}; -} // namespace - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info) { - - std::unordered_map tfb; - - for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = info.tfb_varying_locs[buffer]; - const auto& layout = info.tfb_layouts[buffer]; - const std::size_t varying_count = layout.varying_count; - - std::size_t highest = 0; - - for (std::size_t offset = 0; offset < varying_count; ++offset) { - const std::size_t base_offset = offset; - const u8 location = locations[offset]; - - VaryingTFB varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * sizeof(u32); - varying.components = 1; - - if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - - [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; - UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); - - highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); - } - - UNIMPLEMENTED_IF(highest != layout.stride); - } - return tfb; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct VaryingTFB { - std::size_t buffer; - std::size_t stride; - std::size_t offset; - std::size_t components; -}; - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info); - -} // namespace VideoCommon::Shader From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: [PATCH 024/770] shader: Primitive Vulkan integration --- src/shader_recompiler/CMakeLists.txt | 13 +- .../backend/spirv/emit_context.cpp | 2 + .../backend/spirv/emit_spirv.cpp | 119 +- .../backend/spirv/emit_spirv.h | 419 ++- .../spirv/emit_spirv_bitwise_conversion.cpp | 24 +- .../backend/spirv/emit_spirv_composite.cpp | 48 +- .../spirv/emit_spirv_context_get_set.cpp | 42 +- .../backend/spirv/emit_spirv_control_flow.cpp | 10 +- .../spirv/emit_spirv_floating_point.cpp | 92 +- .../backend/spirv/emit_spirv_integer.cpp | 60 +- .../backend/spirv/emit_spirv_logical.cpp | 40 +- .../backend/spirv/emit_spirv_memory.cpp | 56 +- .../backend/spirv/emit_spirv_select.cpp | 8 +- .../backend/spirv/emit_spirv_undefined.cpp | 10 +- src/shader_recompiler/environment.h | 6 +- src/shader_recompiler/file_environment.cpp | 6 +- src/shader_recompiler/file_environment.h | 4 +- .../frontend/ir/basic_block.cpp | 2 + .../frontend/ir/post_order.cpp | 2 +- .../frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 + .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 33 +- .../translate/impl/not_implemented.cpp | 4 - src/shader_recompiler/main.cpp | 2 +- src/shader_recompiler/profile.h | 13 + src/shader_recompiler/recompiler.cpp | 27 + src/shader_recompiler/recompiler.h | 18 + src/video_core/CMakeLists.txt | 6 +- src/video_core/engines/kepler_compute.h | 1 - src/video_core/engines/shader_bytecode.h | 2298 ----------------- src/video_core/engines/shader_header.h | 158 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 +- .../renderer_vulkan/vk_compute_pipeline.h | 43 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 + .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +- .../renderer_vulkan/vk_pipeline_cache.h | 30 +- .../renderer_vulkan/vk_rasterizer.cpp | 23 +- .../renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- .../renderer_vulkan/vk_resource_pool.h | 12 +- 43 files changed, 1003 insertions(+), 3036 deletions(-) create mode 100644 src/shader_recompiler/profile.h create mode 100644 src/shader_recompiler/recompiler.cpp create mode 100644 src/shader_recompiler/recompiler.h delete mode 100644 src/video_core/engines/shader_bytecode.h delete mode 100644 src/video_core/engines/shader_header.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 84be94a8d..b56bdd3d9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(shader_recompiler +add_library(shader_recompiler STATIC backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -85,13 +85,19 @@ add_executable(shader_recompiler ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp - main.cpp object_pool.h + profile.h + recompiler.cpp + recompiler.h shader_info.h ) -target_include_directories(video_core PRIVATE sirit) +target_include_directories(shader_recompiler PRIVATE sirit) target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) +target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) + +add_executable(shader_util main.cpp) +target_link_libraries(shader_util PRIVATE shader_recompiler) if (MSVC) target_compile_options(shader_recompiler PRIVATE @@ -121,3 +127,4 @@ else() endif() create_target_directory_groups(shader_recompiler) +create_target_directory_groups(shader_util) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 1c985aff8..770067d98 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -115,6 +115,7 @@ void EmitContext::DefineConstantBuffers(const Info& info) { for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); binding += desc.count; @@ -143,6 +144,7 @@ void EmitContext::DefineStorageBuffers(const Info& info) { for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", binding)); std::fill_n(ssbos.data() + binding, desc.count, id); binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 55018332e..d59718435 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -2,8 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include #include +#include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -14,10 +17,10 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits : FuncTraits {}; -template -struct FuncTraits { +template +struct FuncTraits { using ReturnType = ReturnType_; static constexpr size_t NUM_ARGS = sizeof...(Args); @@ -26,15 +29,15 @@ struct FuncTraits { using ArgType = std::tuple_element_t>; }; -template -void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { const Id forward_id{inst->Definition()}; const bool has_forward_id{Sirit::ValidId(forward_id)}; Id current_id{}; if (has_forward_id) { current_id = ctx.ExchangeCurrentId(forward_id); } - const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; + const Id new_id{func(ctx, std::forward(args)...)}; if (has_forward_id) { ctx.ExchangeCurrentId(current_id); } else { @@ -55,42 +58,62 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(emit, ctx, inst, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(emit, ctx, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - (emit.*method)(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - (emit.*method)(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { - Invoke(emit, ctx, inst, std::make_index_sequence<0>{}); + Invoke(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; static constexpr bool is_first_arg_inst = std::is_same_v; using Indices = std::make_index_sequence; - Invoke(emit, ctx, inst, Indices{}); + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); } } } // Anonymous namespace -EmitSPIRV::EmitSPIRV(IR::Program& program) { +std::vector EmitSPIRV(Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -112,43 +135,17 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { if (program.info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); - std::vector result{ctx.Assemble()}; - std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; - std::fwrite(result.data(), sizeof(u32), result.size(), file); - std::fclose(file); - std::system("spirv-dis D:\\shader.spv") == 0 && - std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && - std::system("spirv-cross -V D:\\shader.spv") == 0; + const std::array workgroup_size{env.WorkgroupSize()}; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], + workgroup_size[2]); + + return ctx.Assemble(); } -void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { -#define OPCODE(name, result_type, ...) \ - case IR::Opcode::name: \ - return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE - } - throw LogicError("Invalid opcode {}", inst->Opcode()); -} - -static Id TypeId(const EmitContext& ctx, IR::Type type) { - switch (type) { - case IR::Type::U1: - return ctx.U1; - case IR::Type::U32: - return ctx.U32[1]; - default: - throw NotImplementedException("Phi node type {}", type); - } -} - -Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; boost::container::small_vector operands; operands.reserve(num_args * 2); @@ -178,25 +175,25 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } -void EmitSPIRV::EmitVoid(EmitContext&) {} +void EmitVoid(EmitContext&) {} -Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) { +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return ctx.Def(value); } -void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { +void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { +void EmitGetSignFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { +void EmitGetCarryFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { +void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8bde82613..5813f51ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,223 +8,218 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -class EmitSPIRV { -public: - explicit EmitSPIRV(IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); -private: - void EmitInst(EmitContext& ctx, IR::Inst* inst); - - // Microinstruction emitters - Id EmitPhi(EmitContext& ctx, IR::Inst* inst); - void EmitVoid(EmitContext& ctx); - Id EmitIdentity(EmitContext& ctx, const IR::Value& value); - void EmitBranch(EmitContext& ctx, IR::Block* label); - void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); - void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); - void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); - void EmitReturn(EmitContext& ctx); - void EmitGetRegister(EmitContext& ctx); - void EmitSetRegister(EmitContext& ctx); - void EmitGetPred(EmitContext& ctx); - void EmitSetPred(EmitContext& ctx); - void EmitSetGotoVariable(EmitContext& ctx); - void EmitGetGotoVariable(EmitContext& ctx); - Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitGetAttribute(EmitContext& ctx); - void EmitSetAttribute(EmitContext& ctx); - void EmitGetAttributeIndexed(EmitContext& ctx); - void EmitSetAttributeIndexed(EmitContext& ctx); - void EmitGetZFlag(EmitContext& ctx); - void EmitGetSFlag(EmitContext& ctx); - void EmitGetCFlag(EmitContext& ctx); - void EmitGetOFlag(EmitContext& ctx); - void EmitSetZFlag(EmitContext& ctx); - void EmitSetSFlag(EmitContext& ctx); - void EmitSetCFlag(EmitContext& ctx); - void EmitSetOFlag(EmitContext& ctx); - Id EmitWorkgroupId(EmitContext& ctx); - Id EmitLocalInvocationId(EmitContext& ctx); - Id EmitUndefU1(EmitContext& ctx); - Id EmitUndefU8(EmitContext& ctx); - Id EmitUndefU16(EmitContext& ctx); - Id EmitUndefU32(EmitContext& ctx); - Id EmitUndefU64(EmitContext& ctx); - void EmitLoadGlobalU8(EmitContext& ctx); - void EmitLoadGlobalS8(EmitContext& ctx); - void EmitLoadGlobalU16(EmitContext& ctx); - void EmitLoadGlobalS16(EmitContext& ctx); - void EmitLoadGlobal32(EmitContext& ctx); - void EmitLoadGlobal64(EmitContext& ctx); - void EmitLoadGlobal128(EmitContext& ctx); - void EmitWriteGlobalU8(EmitContext& ctx); - void EmitWriteGlobalS8(EmitContext& ctx); - void EmitWriteGlobalU16(EmitContext& ctx); - void EmitWriteGlobalS16(EmitContext& ctx); - void EmitWriteGlobal32(EmitContext& ctx); - void EmitWriteGlobal64(EmitContext& ctx); - void EmitWriteGlobal128(EmitContext& ctx); - void EmitLoadStorageU8(EmitContext& ctx); - void EmitLoadStorageS8(EmitContext& ctx); - void EmitLoadStorageU16(EmitContext& ctx); - void EmitLoadStorageS16(EmitContext& ctx); - Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitLoadStorage64(EmitContext& ctx); - void EmitLoadStorage128(EmitContext& ctx); - void EmitWriteStorageU8(EmitContext& ctx); - void EmitWriteStorageS8(EmitContext& ctx); - void EmitWriteStorageU16(EmitContext& ctx); - void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); - void EmitWriteStorage64(EmitContext& ctx); - void EmitWriteStorage128(EmitContext& ctx); - void EmitCompositeConstructU32x2(EmitContext& ctx); - void EmitCompositeConstructU32x3(EmitContext& ctx); - void EmitCompositeConstructU32x4(EmitContext& ctx); - void EmitCompositeExtractU32x2(EmitContext& ctx); - Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); - void EmitCompositeExtractU32x4(EmitContext& ctx); - void EmitCompositeConstructF16x2(EmitContext& ctx); - void EmitCompositeConstructF16x3(EmitContext& ctx); - void EmitCompositeConstructF16x4(EmitContext& ctx); - void EmitCompositeExtractF16x2(EmitContext& ctx); - void EmitCompositeExtractF16x3(EmitContext& ctx); - void EmitCompositeExtractF16x4(EmitContext& ctx); - void EmitCompositeConstructF32x2(EmitContext& ctx); - void EmitCompositeConstructF32x3(EmitContext& ctx); - void EmitCompositeConstructF32x4(EmitContext& ctx); - void EmitCompositeExtractF32x2(EmitContext& ctx); - void EmitCompositeExtractF32x3(EmitContext& ctx); - void EmitCompositeExtractF32x4(EmitContext& ctx); - void EmitCompositeConstructF64x2(EmitContext& ctx); - void EmitCompositeConstructF64x3(EmitContext& ctx); - void EmitCompositeConstructF64x4(EmitContext& ctx); - void EmitCompositeExtractF64x2(EmitContext& ctx); - void EmitCompositeExtractF64x3(EmitContext& ctx); - void EmitCompositeExtractF64x4(EmitContext& ctx); - void EmitSelect8(EmitContext& ctx); - void EmitSelect16(EmitContext& ctx); - void EmitSelect32(EmitContext& ctx); - void EmitSelect64(EmitContext& ctx); - void EmitBitCastU16F16(EmitContext& ctx); - Id EmitBitCastU32F32(EmitContext& ctx, Id value); - void EmitBitCastU64F64(EmitContext& ctx); - void EmitBitCastF16U16(EmitContext& ctx); - Id EmitBitCastF32U32(EmitContext& ctx, Id value); - void EmitBitCastF64U64(EmitContext& ctx); - void EmitPackUint2x32(EmitContext& ctx); - void EmitUnpackUint2x32(EmitContext& ctx); - void EmitPackFloat2x16(EmitContext& ctx); - void EmitUnpackFloat2x16(EmitContext& ctx); - void EmitPackDouble2x32(EmitContext& ctx); - void EmitUnpackDouble2x32(EmitContext& ctx); - void EmitGetZeroFromOp(EmitContext& ctx); - void EmitGetSignFromOp(EmitContext& ctx); - void EmitGetCarryFromOp(EmitContext& ctx); - void EmitGetOverflowFromOp(EmitContext& ctx); - void EmitFPAbs16(EmitContext& ctx); - void EmitFPAbs32(EmitContext& ctx); - void EmitFPAbs64(EmitContext& ctx); - Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - void EmitFPMax32(EmitContext& ctx); - void EmitFPMax64(EmitContext& ctx); - void EmitFPMin32(EmitContext& ctx); - void EmitFPMin64(EmitContext& ctx); - Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitFPNeg16(EmitContext& ctx); - void EmitFPNeg32(EmitContext& ctx); - void EmitFPNeg64(EmitContext& ctx); - void EmitFPRecip32(EmitContext& ctx); - void EmitFPRecip64(EmitContext& ctx); - void EmitFPRecipSqrt32(EmitContext& ctx); - void EmitFPRecipSqrt64(EmitContext& ctx); - void EmitFPSqrt(EmitContext& ctx); - void EmitFPSin(EmitContext& ctx); - void EmitFPSinNotReduced(EmitContext& ctx); - void EmitFPExp2(EmitContext& ctx); - void EmitFPExp2NotReduced(EmitContext& ctx); - void EmitFPCos(EmitContext& ctx); - void EmitFPCosNotReduced(EmitContext& ctx); - void EmitFPLog2(EmitContext& ctx); - void EmitFPSaturate16(EmitContext& ctx); - void EmitFPSaturate32(EmitContext& ctx); - void EmitFPSaturate64(EmitContext& ctx); - void EmitFPRoundEven16(EmitContext& ctx); - void EmitFPRoundEven32(EmitContext& ctx); - void EmitFPRoundEven64(EmitContext& ctx); - void EmitFPFloor16(EmitContext& ctx); - void EmitFPFloor32(EmitContext& ctx); - void EmitFPFloor64(EmitContext& ctx); - void EmitFPCeil16(EmitContext& ctx); - void EmitFPCeil32(EmitContext& ctx); - void EmitFPCeil64(EmitContext& ctx); - void EmitFPTrunc16(EmitContext& ctx); - void EmitFPTrunc32(EmitContext& ctx); - void EmitFPTrunc64(EmitContext& ctx); - Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitIAdd64(EmitContext& ctx); - Id EmitISub32(EmitContext& ctx, Id a, Id b); - void EmitISub64(EmitContext& ctx); - Id EmitIMul32(EmitContext& ctx, Id a, Id b); - void EmitINeg32(EmitContext& ctx); - void EmitIAbs32(EmitContext& ctx); - Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); - void EmitShiftRightLogical32(EmitContext& ctx); - void EmitShiftRightArithmetic32(EmitContext& ctx); - void EmitBitwiseAnd32(EmitContext& ctx); - void EmitBitwiseOr32(EmitContext& ctx); - void EmitBitwiseXor32(EmitContext& ctx); - void EmitBitFieldInsert(EmitContext& ctx); - void EmitBitFieldSExtract(EmitContext& ctx); - Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); - Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitULessThan(EmitContext& ctx); - void EmitIEqual(EmitContext& ctx); - void EmitSLessThanEqual(EmitContext& ctx); - void EmitULessThanEqual(EmitContext& ctx); - Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitUGreaterThan(EmitContext& ctx); - void EmitINotEqual(EmitContext& ctx); - void EmitSGreaterThanEqual(EmitContext& ctx); - Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); - void EmitLogicalOr(EmitContext& ctx); - void EmitLogicalAnd(EmitContext& ctx); - void EmitLogicalXor(EmitContext& ctx); - void EmitLogicalNot(EmitContext& ctx); - void EmitConvertS16F16(EmitContext& ctx); - void EmitConvertS16F32(EmitContext& ctx); - void EmitConvertS16F64(EmitContext& ctx); - void EmitConvertS32F16(EmitContext& ctx); - void EmitConvertS32F32(EmitContext& ctx); - void EmitConvertS32F64(EmitContext& ctx); - void EmitConvertS64F16(EmitContext& ctx); - void EmitConvertS64F32(EmitContext& ctx); - void EmitConvertS64F64(EmitContext& ctx); - void EmitConvertU16F16(EmitContext& ctx); - void EmitConvertU16F32(EmitContext& ctx); - void EmitConvertU16F64(EmitContext& ctx); - void EmitConvertU32F16(EmitContext& ctx); - void EmitConvertU32F32(EmitContext& ctx); - void EmitConvertU32F64(EmitContext& ctx); - void EmitConvertU64F16(EmitContext& ctx); - void EmitConvertU64F32(EmitContext& ctx); - void EmitConvertU64F64(EmitContext& ctx); - void EmitConvertU64U32(EmitContext& ctx); - void EmitConvertU32U64(EmitContext& ctx); -}; +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, IR::Block* label); +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitReturn(EmitContext& ctx); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetAttributeIndexed(EmitContext& ctx); +void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx); +void EmitLoadGlobal64(EmitContext& ctx); +void EmitLoadGlobal128(EmitContext& ctx); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx); +void EmitWriteGlobal64(EmitContext& ctx); +void EmitWriteGlobal128(EmitContext& ctx); +void EmitLoadStorageU8(EmitContext& ctx); +void EmitLoadStorageS8(EmitContext& ctx); +void EmitLoadStorageU16(EmitContext& ctx); +void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx); +void EmitLoadStorage128(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx); +void EmitWriteStorageS8(EmitContext& ctx); +void EmitWriteStorageU16(EmitContext& ctx); +void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx); +void EmitCompositeConstructU32x2(EmitContext& ctx); +void EmitCompositeConstructU32x3(EmitContext& ctx); +void EmitCompositeConstructU32x4(EmitContext& ctx); +void EmitCompositeExtractU32x2(EmitContext& ctx); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx); +void EmitCompositeConstructF16x2(EmitContext& ctx); +void EmitCompositeConstructF16x3(EmitContext& ctx); +void EmitCompositeConstructF16x4(EmitContext& ctx); +void EmitCompositeExtractF16x2(EmitContext& ctx); +void EmitCompositeExtractF16x3(EmitContext& ctx); +void EmitCompositeExtractF16x4(EmitContext& ctx); +void EmitCompositeConstructF32x2(EmitContext& ctx); +void EmitCompositeConstructF32x3(EmitContext& ctx); +void EmitCompositeConstructF32x4(EmitContext& ctx); +void EmitCompositeExtractF32x2(EmitContext& ctx); +void EmitCompositeExtractF32x3(EmitContext& ctx); +void EmitCompositeExtractF32x4(EmitContext& ctx); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitSelect8(EmitContext& ctx); +void EmitSelect16(EmitContext& ctx); +void EmitSelect32(EmitContext& ctx); +void EmitSelect64(EmitContext& ctx); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx); +void EmitUnpackUint2x32(EmitContext& ctx); +void EmitPackFloat2x16(EmitContext& ctx); +void EmitUnpackFloat2x16(EmitContext& ctx); +void EmitPackDouble2x32(EmitContext& ctx); +void EmitUnpackDouble2x32(EmitContext& ctx); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx); +void EmitFPAbs32(EmitContext& ctx); +void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +void EmitFPMax32(EmitContext& ctx); +void EmitFPMax64(EmitContext& ctx); +void EmitFPMin32(EmitContext& ctx); +void EmitFPMin64(EmitContext& ctx); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitFPNeg16(EmitContext& ctx); +void EmitFPNeg32(EmitContext& ctx); +void EmitFPNeg64(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx); +void EmitFPRecip64(EmitContext& ctx); +void EmitFPRecipSqrt32(EmitContext& ctx); +void EmitFPRecipSqrt64(EmitContext& ctx); +void EmitFPSqrt(EmitContext& ctx); +void EmitFPSin(EmitContext& ctx); +void EmitFPSinNotReduced(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPExp2NotReduced(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPCosNotReduced(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPRoundEven16(EmitContext& ctx); +void EmitFPRoundEven32(EmitContext& ctx); +void EmitFPRoundEven64(EmitContext& ctx); +void EmitFPFloor16(EmitContext& ctx); +void EmitFPFloor32(EmitContext& ctx); +void EmitFPFloor64(EmitContext& ctx); +void EmitFPCeil16(EmitContext& ctx); +void EmitFPCeil32(EmitContext& ctx); +void EmitFPCeil64(EmitContext& ctx); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitIAdd64(EmitContext& ctx); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +void EmitISub64(EmitContext& ctx); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +void EmitINeg32(EmitContext& ctx); +void EmitIAbs32(EmitContext& ctx); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +void EmitShiftRightLogical32(EmitContext& ctx); +void EmitShiftRightArithmetic32(EmitContext& ctx); +void EmitBitwiseAnd32(EmitContext& ctx); +void EmitBitwiseOr32(EmitContext& ctx); +void EmitBitwiseXor32(EmitContext& ctx); +void EmitBitFieldInsert(EmitContext& ctx); +void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitULessThan(EmitContext& ctx); +void EmitIEqual(EmitContext& ctx); +void EmitSLessThanEqual(EmitContext& ctx); +void EmitULessThanEqual(EmitContext& ctx); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitUGreaterThan(EmitContext& ctx); +void EmitINotEqual(EmitContext& ctx); +void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +void EmitLogicalOr(EmitContext& ctx); +void EmitLogicalAnd(EmitContext& ctx); +void EmitLogicalXor(EmitContext& ctx); +void EmitLogicalNot(EmitContext& ctx); +void EmitConvertS16F16(EmitContext& ctx); +void EmitConvertS16F32(EmitContext& ctx); +void EmitConvertS16F64(EmitContext& ctx); +void EmitConvertS32F16(EmitContext& ctx); +void EmitConvertS32F32(EmitContext& ctx); +void EmitConvertS32F64(EmitContext& ctx); +void EmitConvertS64F16(EmitContext& ctx); +void EmitConvertS64F32(EmitContext& ctx); +void EmitConvertS64F64(EmitContext& ctx); +void EmitConvertU16F16(EmitContext& ctx); +void EmitConvertU16F32(EmitContext& ctx); +void EmitConvertU16F64(EmitContext& ctx); +void EmitConvertU32F16(EmitContext& ctx); +void EmitConvertU32F32(EmitContext& ctx); +void EmitConvertU32F64(EmitContext& ctx); +void EmitConvertU64F16(EmitContext& ctx); +void EmitConvertU64F32(EmitContext& ctx); +void EmitConvertU64F64(EmitContext& ctx); +void EmitConvertU64U32(EmitContext& ctx); +void EmitConvertU32U64(EmitContext& ctx); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index af82df99c..49c200498 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -6,51 +6,51 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { +void EmitBitCastU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { +void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { +void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { +void EmitBitCastF64U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackUint2x32(EmitContext&) { +void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { +void EmitUnpackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { +void EmitPackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { +void EmitUnpackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { +void EmitPackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { +void EmitUnpackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index a7374c89d..348e4796d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,99 +6,99 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { +void EmitCompositeConstructU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { +void EmitCompositeConstructU32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { +void EmitCompositeConstructU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { +void EmitCompositeExtractU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } -void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { +void EmitCompositeExtractU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { +void EmitCompositeConstructF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { +void EmitCompositeConstructF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { +void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { +void EmitCompositeExtractF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { +void EmitCompositeExtractF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { +void EmitCompositeExtractF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { +void EmitCompositeConstructF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { +void EmitCompositeConstructF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { +void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { +void EmitCompositeExtractF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { +void EmitCompositeExtractF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { +void EmitCompositeExtractF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { +void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { +void EmitCompositeConstructF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { +void EmitCompositeConstructF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { +void EmitCompositeExtractF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { +void EmitCompositeExtractF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { +void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f4c9970eb..eb9c01c5a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,31 +6,31 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitGetRegister(EmitContext&) { +void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetRegister(EmitContext&) { +void EmitSetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetPred(EmitContext&) { +void EmitGetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetPred(EmitContext&) { +void EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { +void EmitSetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { +void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -43,59 +43,59 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: return ctx.OpLoad(ctx.U32[1], access_chain); } -void EmitSPIRV::EmitGetAttribute(EmitContext&) { +void EmitGetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttribute(EmitContext&) { +void EmitSetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { +void EmitGetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { +void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetZFlag(EmitContext&) { +void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetSFlag(EmitContext&) { +void EmitGetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetCFlag(EmitContext&) { +void EmitGetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetOFlag(EmitContext&) { +void EmitGetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetZFlag(EmitContext&) { +void EmitSetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetSFlag(EmitContext&) { +void EmitSetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetCFlag(EmitContext&) { +void EmitSetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetOFlag(EmitContext&) { +void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { +Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } -Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { +Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 549c1907a..6c4199664 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,25 +6,25 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { +void EmitBranch(EmitContext& ctx, IR::Block* label) { ctx.OpBranch(label->Definition()); } -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), spv::LoopControlMask::MaskNone); } -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); } -void EmitSPIRV::EmitReturn(EmitContext& ctx) { +void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9bc121f8..d24fbb353 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -33,187 +33,187 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { } // Anonymous namespace -void EmitSPIRV::EmitFPAbs16(EmitContext&) { +void EmitFPAbs16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs32(EmitContext&) { +void EmitFPAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs64(EmitContext&) { +void EmitFPAbs64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } -Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitSPIRV::EmitFPMax32(EmitContext&) { +void EmitFPMax32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMax64(EmitContext&) { +void EmitFPMax64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin32(EmitContext&) { +void EmitFPMin32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin64(EmitContext&) { +void EmitFPMin64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitSPIRV::EmitFPNeg16(EmitContext&) { +void EmitFPNeg16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg32(EmitContext&) { +void EmitFPNeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg64(EmitContext&) { +void EmitFPNeg64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip32(EmitContext&) { +void EmitFPRecip32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip64(EmitContext&) { +void EmitFPRecip64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { +void EmitFPRecipSqrt32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { +void EmitFPRecipSqrt64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSqrt(EmitContext&) { +void EmitFPSqrt(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSin(EmitContext&) { +void EmitFPSin(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { +void EmitFPSinNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2(EmitContext&) { +void EmitFPExp2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { +void EmitFPExp2NotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCos(EmitContext&) { +void EmitFPCos(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { +void EmitFPCosNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPLog2(EmitContext&) { +void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate16(EmitContext&) { +void EmitFPSaturate16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate32(EmitContext&) { +void EmitFPSaturate32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate64(EmitContext&) { +void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { +void EmitFPRoundEven16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { +void EmitFPRoundEven32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { +void EmitFPRoundEven64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor16(EmitContext&) { +void EmitFPFloor16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor32(EmitContext&) { +void EmitFPFloor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor64(EmitContext&) { +void EmitFPFloor64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil16(EmitContext&) { +void EmitFPCeil16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil32(EmitContext&) { +void EmitFPCeil32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil64(EmitContext&) { +void EmitFPCeil64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc16(EmitContext&) { +void EmitFPTrunc16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc32(EmitContext&) { +void EmitFPTrunc32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc64(EmitContext&) { +void EmitFPTrunc64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 32af94a73..a1d16b81e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -6,126 +6,126 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } return ctx.OpIAdd(ctx.U32[1], a, b); } -void EmitSPIRV::EmitIAdd64(EmitContext&) { +void EmitIAdd64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { +Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitSPIRV::EmitISub64(EmitContext&) { +void EmitISub64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } -void EmitSPIRV::EmitINeg32(EmitContext&) { +void EmitINeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIAbs32(EmitContext&) { +void EmitIAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { +void EmitShiftRightLogical32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { +void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { +void EmitBitwiseAnd32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { +void EmitBitwiseOr32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { +void EmitBitwiseXor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { +void EmitBitFieldInsert(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { +void EmitBitFieldSExtract(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } -Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitULessThan(EmitContext&) { +void EmitULessThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIEqual(EmitContext&) { +void EmitIEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { +void EmitSLessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitULessThanEqual(EmitContext&) { +void EmitULessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitUGreaterThan(EmitContext&) { +void EmitUGreaterThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitINotEqual(EmitContext&) { +void EmitINotEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { +void EmitSGreaterThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitLogicalOr(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalAnd(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalXor(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalNot(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index 7b43c4ed8..ff2f4fb74 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,83 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitConvertS16F16(EmitContext&) { +void EmitConvertS16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F32(EmitContext&) { +void EmitConvertS16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F64(EmitContext&) { +void EmitConvertS16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F16(EmitContext&) { +void EmitConvertS32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F32(EmitContext&) { +void EmitConvertS32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F64(EmitContext&) { +void EmitConvertS32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F16(EmitContext&) { +void EmitConvertS64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F32(EmitContext&) { +void EmitConvertS64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F64(EmitContext&) { +void EmitConvertS64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F16(EmitContext&) { +void EmitConvertU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F32(EmitContext&) { +void EmitConvertU16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F64(EmitContext&) { +void EmitConvertU16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F16(EmitContext&) { +void EmitConvertU32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F32(EmitContext&) { +void EmitConvertU32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F64(EmitContext&) { +void EmitConvertU32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F16(EmitContext&) { +void EmitConvertU64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F32(EmitContext&) { +void EmitConvertU64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F64(EmitContext&) { +void EmitConvertU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64U32(EmitContext&) { +void EmitConvertU64U32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32U64(EmitContext&) { +void EmitConvertU32U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 5769a3c95..77d698ffd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,79 +22,79 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { +void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { +void EmitLoadGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { +void EmitLoadGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { +void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { +void EmitLoadGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { +void EmitLoadGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { +void EmitLoadGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { +void EmitWriteGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { +void EmitWriteGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { +void EmitWriteGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { +void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { +void EmitWriteGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { +void EmitWriteGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { +void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { +void EmitLoadStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { +void EmitLoadStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { +void EmitLoadStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { +void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -105,31 +105,31 @@ Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, return ctx.OpLoad(ctx.U32[1], pointer); } -void EmitSPIRV::EmitLoadStorage64(EmitContext&) { +void EmitLoadStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorage128(EmitContext&) { +void EmitLoadStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { +void EmitWriteStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { +void EmitWriteStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { +void EmitWriteStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { +void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -140,11 +140,11 @@ void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitSPIRV::EmitWriteStorage64(EmitContext&) { +void EmitWriteStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage128(EmitContext&) { +void EmitWriteStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 40a856f72..8d5062724 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,19 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitSelect8(EmitContext&) { +void EmitSelect8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect16(EmitContext&) { +void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect32(EmitContext&) { +void EmitSelect32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect64(EmitContext&) { +void EmitSelect64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index c1ed8f281..19b06dbe4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { +Id EmitUndefU1(EmitContext& ctx) { return ctx.OpUndef(ctx.U1); } -Id EmitSPIRV::EmitUndefU8(EmitContext&) { +Id EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU16(EmitContext&) { +Id EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { +Id EmitUndefU32(EmitContext& ctx) { return ctx.OpUndef(ctx.U32[1]); } -Id EmitSPIRV::EmitUndefU64(EmitContext&) { +Id EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index f6230e817..0ba681fb9 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "common/common_types.h" namespace Shader { @@ -8,7 +10,9 @@ class Environment { public: virtual ~Environment() = default; - [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0; + [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index b34bf462b..5127523f9 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -29,7 +29,7 @@ FileEnvironment::FileEnvironment(const char* path) { FileEnvironment::~FileEnvironment() = default; -u64 FileEnvironment::ReadInstruction(u32 offset) const { +u64 FileEnvironment::ReadInstruction(u32 offset) { if (offset % 8 != 0) { throw InvalidArgument("offset={} is not aligned to 8", offset); } @@ -39,4 +39,8 @@ u64 FileEnvironment::ReadInstruction(u32 offset) const { return data[offset / 8]; } +std::array FileEnvironment::WorkgroupSize() { + return {1, 1, 1}; +} + } // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index c294bc6fa..b8c4bbadd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -12,7 +12,9 @@ public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; - u64 ReadInstruction(u32 offset) const override; + u64 ReadInstruction(u32 offset) override; + + std::array WorkgroupSize() override; private: std::vector data; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 5ae91dd7d..ec029dfd6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map& block_to_ind return fmt::format("#{}", arg.U32()); case Type::U64: return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); case Type::Reg: return fmt::format("{}", arg.Reg()); case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index a48b8dec5..8709a2ea1 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) { if (!visited.insert(branch).second) { return false; } - // Calling push_back twice is faster than insert on msvc + // Calling push_back twice is faster than insert on MSVC block_stack.push_back(block); block_stack.push_back(branch); return true; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c..8c44ebb29 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d7..8bd468244 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d1068..c3c4b9abd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acb..6b2a1356b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 1610bb34e..050a37f18 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - Backend::SPIRV::EmitSPIRV spirv{program}; + void(Backend::SPIRV::EmitSPIRV(env, program)); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..c96d783b7 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +struct Profile { + bool unified_descriptor_binding; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp new file mode 100644 index 000000000..b25081e39 --- /dev/null +++ b/src/shader_recompiler/recompiler.cpp @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader { + +std::pair> RecompileSPIRV(Environment& env, u32 start_address) { + ObjectPool flow_block_pool; + ObjectPool inst_pool; + ObjectPool block_pool; + + Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; +} + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h new file mode 100644 index 000000000..4cb973878 --- /dev/null +++ b/src/shader_recompiler/recompiler.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader { + +[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c5ce71706..3323e6916 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -123,6 +120,7 @@ add_library(video_core STATIC renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -201,7 +199,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 0d7683c2d..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,7 +12,6 @@ #include "common/common_types.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast(1U << bit_position); - break; - case '1': - expect |= static_cast(1U << bit_position); - mask |= static_cast(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector GetDecodeTable() { - std::vector table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496..588ce6139 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include +#include + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); +} -ComputePipeline::ComputePipeline() = default; +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace -ComputePipeline::~ComputePipeline() = default; +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); + + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} + +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3d..dc045d524 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..3bea1ff44 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..2501f9967 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..b06288403 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180..4bf3e4819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(StageFromProgram(static_cast(program))); -} + ~Environment() override = default; -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] std::optional Analyze(u32 start_address) { + const std::optional size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } -} + + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } + + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast(data.get()), size); + } + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read(program_base + address); + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector code; + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache + } + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; +} + +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340d..eb35abc27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -67,13 +67,13 @@ struct hash { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map> compute_cache; + std::unordered_map compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d9..b757454c4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b..3fd03b915 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector ticks; ///< Ticks for each resource From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: [PATCH 025/770] spirv: Add lower fp16 to fp32 pass --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 29 +++-- .../backend/spirv/emit_context.h | 6 +- .../backend/spirv/emit_spirv.cpp | 2 + .../backend/spirv/emit_spirv.h | 103 +++++++++--------- .../spirv/emit_spirv_bitwise_conversion.cpp | 28 +++-- .../backend/spirv/emit_spirv_composite.cpp | 48 ++++---- .../backend/spirv/emit_spirv_control_flow.cpp | 2 +- .../backend/spirv/emit_spirv_convert.cpp | 89 +++++++++++++++ .../spirv/emit_spirv_floating_point.cpp | 48 ++++---- .../backend/spirv/emit_spirv_integer.cpp | 16 --- .../backend/spirv/emit_spirv_logical.cpp | 72 +----------- .../backend/spirv/emit_spirv_memory.cpp | 22 +++- src/shader_recompiler/frontend/ir/condition.h | 2 +- .../frontend/ir/ir_emitter.cpp | 70 ++++++------ .../frontend/ir/microinstruction.cpp | 4 + .../frontend/ir/microinstruction.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- src/shader_recompiler/frontend/ir/program.cpp | 2 +- .../frontend/maxwell/program.cpp | 2 + .../floating_point_conversion_integer.cpp | 62 ++++++++--- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 18 ++- .../ir_opt/constant_propagation_pass.cpp | 12 +- .../ir_opt/lower_fp16_to_fp32.cpp | 79 ++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + src/shader_recompiler/main.cpp | 10 +- src/shader_recompiler/object_pool.h | 2 +- .../renderer_vulkan/vk_compute_pass.cpp | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 - .../vulkan_common/vulkan_device.cpp | 10 +- 32 files changed, 479 insertions(+), 285 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp create mode 100644 src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b56bdd3d9..6047f3ebe 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_composite.cpp backend/spirv/emit_spirv_context_get_set.cpp backend/spirv/emit_spirv_control_flow.cpp + backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp @@ -82,6 +83,7 @@ add_library(shader_recompiler STATIC ir_opt/dead_code_elimination_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp + ir_opt/lower_fp16_to_fp32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 770067d98..ea1c8a3be 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -30,8 +30,11 @@ EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { DefineCommonTypes(program.info); DefineCommonConstants(); DefineSpecialVariables(program.info); - DefineConstantBuffers(program.info); - DefineStorageBuffers(program.info); + + u32 binding{}; + DefineConstantBuffers(program.info, binding); + DefineStorageBuffers(program.info, binding); + DefineLabels(program); } @@ -58,6 +61,12 @@ void EmitContext::DefineCommonTypes(const Info& info) { U1 = Name(TypeBool(), "u1"); + // TODO: Conditionally define these + AddCapability(spv::Capability::Int16); + AddCapability(spv::Capability::Int64); + U16 = Name(TypeInt(16, false), "u16"); + U64 = Name(TypeInt(64, false), "u64"); + F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); @@ -95,12 +104,12 @@ void EmitContext::DefineSpecialVariables(const Info& info) { } } -void EmitContext::DefineConstantBuffers(const Info& info) { +void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; - Decorate(array_type, spv::Decoration::ArrayStride, 16U); + Decorate(array_type, spv::Decoration::ArrayStride, 4U); const Id struct_type{TypeStruct(array_type)}; Name(struct_type, "cbuf_block"); @@ -111,18 +120,19 @@ void EmitContext::DefineConstantBuffers(const Info& info) { const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); - u32 binding{}; + u32 index{}; for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); + index += desc.count; binding += desc.count; } } -void EmitContext::DefineStorageBuffers(const Info& info) { +void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { if (info.storage_buffers_descriptors.empty()) { return; } @@ -140,13 +150,14 @@ void EmitContext::DefineStorageBuffers(const Info& info) { const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); - u32 binding{}; + u32 index{}; for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("ssbo{}", binding)); - std::fill_n(ssbos.data() + binding, desc.count, id); + Name(id, fmt::format("ssbo{}", index)); + std::fill_n(ssbos.data() + index, desc.count, id); + index += desc.count; binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c4b84759d..8de203da2 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -37,6 +37,8 @@ public: Id void_id{}; Id U1{}; + Id U16{}; + Id U64{}; VectorTypes F32; VectorTypes U32; VectorTypes F16; @@ -59,8 +61,8 @@ private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineSpecialVariables(const Info& info); - void DefineConstantBuffers(const Info& info); - void DefineStorageBuffers(const Info& info); + void DefineConstantBuffers(const Info& info, u32& binding); + void DefineStorageBuffers(const Info& info, u32& binding); void DefineLabels(IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d59718435..4ce07c281 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,6 +14,8 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#pragma optimize("", off) + namespace Shader::Backend::SPIRV { namespace { template diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5813f51ff..2b59c0b72 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -79,26 +79,27 @@ void EmitWriteStorageU16(EmitContext& ctx); void EmitWriteStorageS16(EmitContext& ctx); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage128(EmitContext& ctx); -void EmitCompositeConstructU32x2(EmitContext& ctx); -void EmitCompositeConstructU32x3(EmitContext& ctx); -void EmitCompositeConstructU32x4(EmitContext& ctx); -void EmitCompositeExtractU32x2(EmitContext& ctx); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF16x2(EmitContext& ctx); void EmitCompositeConstructF16x3(EmitContext& ctx); void EmitCompositeConstructF16x4(EmitContext& ctx); -void EmitCompositeExtractF16x2(EmitContext& ctx); -void EmitCompositeExtractF16x3(EmitContext& ctx); -void EmitCompositeExtractF16x4(EmitContext& ctx); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF32x2(EmitContext& ctx); void EmitCompositeConstructF32x3(EmitContext& ctx); void EmitCompositeConstructF32x4(EmitContext& ctx); -void EmitCompositeExtractF32x2(EmitContext& ctx); -void EmitCompositeExtractF32x3(EmitContext& ctx); -void EmitCompositeExtractF32x4(EmitContext& ctx); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -116,11 +117,13 @@ void EmitBitCastF16U16(EmitContext& ctx); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); void EmitPackUint2x32(EmitContext& ctx); -void EmitUnpackUint2x32(EmitContext& ctx); -void EmitPackFloat2x16(EmitContext& ctx); -void EmitUnpackFloat2x16(EmitContext& ctx); -void EmitPackDouble2x32(EmitContext& ctx); -void EmitUnpackDouble2x32(EmitContext& ctx); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); @@ -159,18 +162,18 @@ void EmitFPLog2(EmitContext& ctx); void EmitFPSaturate16(EmitContext& ctx); void EmitFPSaturate32(EmitContext& ctx); void EmitFPSaturate64(EmitContext& ctx); -void EmitFPRoundEven16(EmitContext& ctx); -void EmitFPRoundEven32(EmitContext& ctx); -void EmitFPRoundEven64(EmitContext& ctx); -void EmitFPFloor16(EmitContext& ctx); -void EmitFPFloor32(EmitContext& ctx); -void EmitFPFloor64(EmitContext& ctx); -void EmitFPCeil16(EmitContext& ctx); -void EmitFPCeil32(EmitContext& ctx); -void EmitFPCeil64(EmitContext& ctx); -void EmitFPTrunc16(EmitContext& ctx); -void EmitFPTrunc32(EmitContext& ctx); -void EmitFPTrunc64(EmitContext& ctx); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); @@ -201,25 +204,25 @@ void EmitLogicalOr(EmitContext& ctx); void EmitLogicalAnd(EmitContext& ctx); void EmitLogicalXor(EmitContext& ctx); void EmitLogicalNot(EmitContext& ctx); -void EmitConvertS16F16(EmitContext& ctx); -void EmitConvertS16F32(EmitContext& ctx); -void EmitConvertS16F64(EmitContext& ctx); -void EmitConvertS32F16(EmitContext& ctx); -void EmitConvertS32F32(EmitContext& ctx); -void EmitConvertS32F64(EmitContext& ctx); -void EmitConvertS64F16(EmitContext& ctx); -void EmitConvertS64F32(EmitContext& ctx); -void EmitConvertS64F64(EmitContext& ctx); -void EmitConvertU16F16(EmitContext& ctx); -void EmitConvertU16F32(EmitContext& ctx); -void EmitConvertU16F64(EmitContext& ctx); -void EmitConvertU32F16(EmitContext& ctx); -void EmitConvertU32F32(EmitContext& ctx); -void EmitConvertU32F64(EmitContext& ctx); -void EmitConvertU64F16(EmitContext& ctx); -void EmitConvertU64F32(EmitContext& ctx); -void EmitConvertU64F64(EmitContext& ctx); -void EmitConvertU64U32(EmitContext& ctx); -void EmitConvertU32U64(EmitContext& ctx); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 49c200498..e0d1ba413 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -34,24 +34,32 @@ void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitUnpackUint2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); } -void EmitPackFloat2x16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); } -void EmitUnpackFloat2x16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F16[2], value); } -void EmitPackDouble2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpPackHalf2x16(ctx.U32[1], value); } -void EmitUnpackDouble2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackHalf2x16(ctx.F32[2], value); +} + +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F64[1], value); +} + +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 348e4796d..c950854a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,28 +6,28 @@ namespace Shader::Backend::SPIRV { -void EmitCompositeConstructU32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); } -void EmitCompositeConstructU32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); } -void EmitCompositeConstructU32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); } -void EmitCompositeExtractU32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { - return ctx.OpCompositeExtract(ctx.U32[1], vector, index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -void EmitCompositeExtractU32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } void EmitCompositeConstructF16x2(EmitContext&) { @@ -42,16 +42,16 @@ void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitCompositeExtractF16x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeExtractF16x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeExtractF16x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } void EmitCompositeConstructF32x2(EmitContext&) { @@ -66,16 +66,16 @@ void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitCompositeExtractF32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } -void EmitCompositeExtractF32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } -void EmitCompositeExtractF32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } void EmitCompositeConstructF64x2(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6c4199664..48755b827 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -11,7 +11,7 @@ void EmitBranch(EmitContext& ctx, IR::Block* label) { } void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { + IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..76ccaffce --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index d24fbb353..9ef180531 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -169,52 +169,52 @@ void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitFPRoundEven16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven16(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F16[1], value); } -void EmitFPRoundEven32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven32(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F32[1], value); } -void EmitFPRoundEven64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven64(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F64[1], value); } -void EmitFPFloor16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor16(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F16[1], value); } -void EmitFPFloor32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor32(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F32[1], value); } -void EmitFPFloor64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor64(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F64[1], value); } -void EmitFPCeil16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil16(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F16[1], value); } -void EmitFPCeil32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil32(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F32[1], value); } -void EmitFPCeil64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil64(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F64[1], value); } -void EmitFPTrunc16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc16(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F16[1], value); } -void EmitFPTrunc32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc32(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F32[1], value); } -void EmitFPTrunc64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc64(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F64[1], value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index a1d16b81e..22117a4ee 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -113,20 +113,4 @@ Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitLogicalOr(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalAnd(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalXor(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalNot(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index ff2f4fb74..c5a07252f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitConvertS16F16(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS16F32(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS16F64(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS32F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS32F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64U32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32U64(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 77d698ffd..808c1b401 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -94,8 +94,7 @@ void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset) { +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } @@ -129,8 +128,8 @@ void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, Id value) { +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } @@ -140,8 +139,19 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitWriteStorage64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + // TODO: Support reinterpreting bindings, guaranteed to be aligned + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id low_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)}; + const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)}; + ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } void EmitWriteStorage128(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 16b4ae888..51c2f15cf 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f42489d41..559ab9cca 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -547,11 +547,11 @@ F32 IREmitter::FPSqrt(const F32& value) { F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPSaturate16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPSaturate32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPSaturate64, value); default: ThrowInvalidType(value.Type()); @@ -560,11 +560,11 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPRoundEven16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPRoundEven32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRoundEven64, value); default: ThrowInvalidType(value.Type()); @@ -573,11 +573,11 @@ F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPFloor16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPFloor32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPFloor64, value); default: ThrowInvalidType(value.Type()); @@ -586,11 +586,11 @@ F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPCeil16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPCeil32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPCeil64, value); default: ThrowInvalidType(value.Type()); @@ -599,11 +599,11 @@ F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPTrunc16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPTrunc32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPTrunc64, value); default: ThrowInvalidType(value.Type()); @@ -729,33 +729,33 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS64F64, value); default: ThrowInvalidType(value.Type()); @@ -769,33 +769,33 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU64F64, value); default: ThrowInvalidType(value.Type()); @@ -829,10 +829,10 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { case 64: switch (value.Type()) { case Type::U32: + return Inst(Opcode::ConvertU64U32, value); + case Type::U64: // Nothing to do return value; - case Type::U64: - return Inst(Opcode::ConvertU64U32, value); default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ee76db9ad..d6a9be87d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -216,6 +216,10 @@ void Inst::ReplaceUsesWith(Value replacement) { } } +void Inst::ReplaceOpcode(IR::Opcode opcode) { + op = opcode; +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 5b244fa0b..321393dd7 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -86,6 +86,8 @@ public: void ReplaceUsesWith(Value replacement); + void ReplaceOpcode(IR::Opcode opcode); + template requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ede5e20c2..50da77535 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -119,8 +119,10 @@ OPCODE(PackUint2x32, U64, U32x OPCODE(UnpackUint2x32, U32x2, U64, ) OPCODE(PackFloat2x16, U32, F16x2, ) OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackDouble2x32, U64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, U64, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit OPCODE(GetZeroFromOp, U1, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 0ce99ef2a..8c301c3a1 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -35,4 +35,4 @@ std::string DumpProgram(const Program& program) { return ret; } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8c44ebb29..16cdc12e2 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool dest_format; BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; - BitField<39, 1, Rounding> rounding; + BitField<39, 2, Rounding> rounding; BitField<49, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; @@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) { } } +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; @@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { const size_t bitsize{BitSize(f2i.dest_format)}; const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; - v.X(f2i.dest_reg, result); + if (bitsize == 64) { + const IR::Value vector{v.ir.UnpackUint2x32(result)}; + v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); + v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + } else { + v.X(f2i.dest_reg, result); + } if (f2i.cc != 0) { - v.SetZFlag(v.ir.GetZeroFromOp(result)); - if (is_signed) { - v.SetSFlag(v.ir.GetSignFromOp(result)); - } else { - v.ResetSFlag(); - } - v.ResetCFlag(); - - // TODO: Investigate if out of bound conversions sets the overflow flag - v.ResetOFlag(); + throw NotImplementedException("F2I CC"); } } } // Anonymous namespace @@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) { f2i.base.src_format.Value()); } }()}; - TranslateF2I(*this, insn, op_a); } -void TranslatorVisitor::F2I_cbuf(u64) { - throw NotImplementedException("{}", Opcode::F2I_cbuf); +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetCbufF(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); } void TranslatorVisitor::F2I_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8bd468244..27aba2cf8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { class TranslatorVisitor { public: - explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} Environment& env; IR::IREmitter ir; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f2326dea1..f7f102f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF16x3: + case IR::Opcode::CompositeConstructF16x4: + case IR::Opcode::CompositeExtractF16x2: + case IR::Opcode::CompositeExtractF16x3: + case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::PackFloat2x16: + case IR::Opcode::UnpackFloat2x16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS32F16: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU32F16: + case IR::Opcode::ConvertU64F16: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: - info.uses_fp16; + info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: case IR::Opcode::FPAdd64: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9eb61b54c..4d4e88259 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) { bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { /* * We are looking for this pattern: - * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) - * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) - * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) - * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) - * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) - * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %rhs_mul = IMul32 %rhs_bfe, %factor_b + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %rhs_mul = IMul32 %lhs_bfe, %factor_b + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 + * %result = IAdd32 %lhs_shl, %rhs_mul * * And replacing it with * %result = IMul32 %factor_a, %factor_b diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..c7032f168 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs16: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd16: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil16: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor16: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma16: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul16: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg16: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven16: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate16: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPTrunc16: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF16x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF16x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF16x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF16x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF16x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF16x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::ConvertS16F16: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F16: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F16: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F16: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F16: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F16: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::PackFloat2x16: + return IR::Opcode::PackHalf2x16; + case IR::Opcode::UnpackFloat2x16: + return IR::Opcode::UnpackHalf2x16; + default: + return op; + } +} +} // Anonymous namespace + +void LowerFp16ToFp32(IR::Program& program) { + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 89e5811d3..38106308c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); +void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 050a37f18..abd44e323 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -67,8 +67,8 @@ int main() { ObjectPool inst_pool; ObjectPool block_pool; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - // FileEnvironment env{"D:\\Shaders\\shader.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + FileEnvironment env{"D:\\Shaders\\shader.bin"}; block_pool.ReleaseContents(); inst_pool.ReleaseContents(); flow_block_pool.ReleaseContents(); @@ -76,5 +76,9 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - void(Backend::SPIRV::EmitSPIRV(env, program)); + const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; + std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); + std::fclose(file); + std::system("spirv-dis D:\\shader.spv"); } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index f78813b5f..c10751b9d 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template - requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v[[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward(args)...); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..a444d55d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + /* + FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); + */ } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4bf3e4819..c2a41a360 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -31,8 +31,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); @@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { // TODO: Load from cache } const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-dis D:\\shader.spv"); + shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b757454c4..1b662f9f3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,8 +36,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..85f903125 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = false, .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, + .shaderFloat64 = true, + .shaderInt64 = true, + .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + // is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: [PATCH 026/770] shader: Add denorm flush support --- src/shader_recompiler/CMakeLists.txt | 5 +- .../backend/spirv/emit_spirv.cpp | 63 ++++++++++++++-- .../backend/spirv/emit_spirv.h | 4 +- .../spirv/emit_spirv_floating_point.cpp | 6 +- .../frontend/ir/ir_emitter.cpp | 32 ++++----- .../frontend/ir/ir_emitter.h | 8 +-- src/shader_recompiler/frontend/ir/modifiers.h | 23 +++--- .../floating_point_conversion_integer.cpp | 19 +++-- .../ir_opt/collect_shader_info_pass.cpp | 71 +++++++++++++++++-- .../global_memory_to_storage_buffer_pass.cpp | 1 - src/shader_recompiler/main.cpp | 13 +++- src/shader_recompiler/profile.h | 9 ++- src/shader_recompiler/recompiler.cpp | 5 +- src/shader_recompiler/recompiler.h | 4 +- src/shader_recompiler/shader_info.h | 7 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 7 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 +++- .../vulkan_common/vulkan_device.cpp | 26 ++++--- src/video_core/vulkan_common/vulkan_device.h | 33 ++++----- .../vulkan_common/vulkan_wrapper.cpp | 2 - 20 files changed, 260 insertions(+), 93 deletions(-) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6047f3ebe..fbd4ec6dc 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h + frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC shader_info.h ) -target_include_directories(shader_recompiler PRIVATE sirit) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) -target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) +target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) add_executable(shader_util main.cpp) target_link_libraries(shader_util PRIVATE shader_recompiler) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4ce07c281..2519e446a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,8 +14,6 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { template @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { throw NotImplementedException("Phi node type {}", type); } } + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + if (!profile.support_float_controls) { + return; + } + const Info& info{program.info}; + if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && + !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { + return; + } + ctx.AddExtension("SPV_KHR_float_controls"); + + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + } + } +} } // Anonymous namespace -std::vector EmitSPIRV(Environment& env, IR::Program& program) { +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -131,10 +181,11 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.OpFunctionEnd(); } boost::container::small_vector interfaces; - if (program.info.uses_workgroup_id) { + const Info& info{program.info}; + if (info.uses_workgroup_id) { interfaces.push_back(ctx.workgroup_id); } - if (program.info.uses_local_invocation_id) { + if (info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } const std::span interfaces_span(interfaces.data(), interfaces.size()); @@ -144,6 +195,8 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); + SetupDenormControl(profile, program, ctx, func); + return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 2b59c0b72..de624a151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -11,10 +11,12 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, + IR::Program& program); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9ef180531..c9687de37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::NoContraction); } switch (flags.rounding) { + case IR::FpRounding::DontCare: + break; case IR::FpRounding::RN: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); break; case IR::FpRounding::RM: ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); break; } - if (flags.fmz_mode != IR::FmzMode::FTZ) { - throw NotImplementedException("Denorm management not implemented"); - } return op; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } -F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -129,10 +129,10 @@ public: [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,25 +4,30 @@ #pragma once +#include "common/common_types.h" + namespace Shader::IR { enum class FmzMode : u8 { - None, // Denorms are not flushed, NAN is propagated (nouveau) - FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) - FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) }; enum class FpRounding : u8 { - RN, // Round to nearest even, - RM, // Round towards negative infinity - RP, // Round towards positive infinity - RZ, // Round towards zero + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero }; struct FpControl { bool no_contraction{false}; - FpRounding rounding{FpRounding::RN}; - FmzMode fmz_mode{FmzMode::FTZ}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; }; static_assert(sizeof(FpControl) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,23 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { namespace { -void AddConstantBufferDescriptor(Info& info, u32 index) { - auto& descriptor{info.constant_buffers.at(index)}; - if (descriptor) { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { return; } - descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + info.constant_buffer_mask |= 1U << index; + info.constant_buffer_descriptors.push_back({ .index{index}, .count{1}, }); } -void Visit(Info& info, IR::Inst& inst) { +void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { break; case IR::Opcode::GetCbuf: if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32()); + AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { break; } } + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); +} } // Anonymous namespace void CollectShaderInfoPass(IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_offset{storage_buffer.offset}, .count{1}, }); - info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); ++storage_index; } for (const StorageInst& storage_inst : to_replace) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index abd44e323..72565f477 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -60,6 +60,17 @@ void RunDatabase() { fmt::print(stdout, "{} ms", duration_cast(t - t0).count() / double(N)); } +static constexpr Profile PROFILE{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = true, + .support_separate_rounding_mode = true, + .support_fp16_denorm_preserve = true, + .support_fp32_denorm_preserve = true, + .support_fp16_denorm_flush = true, + .support_fp32_denorm_flush = true, +}; + int main() { // RunDatabase(); @@ -76,7 +87,7 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + const std::vector spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); std::fclose(file); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c96d783b7..9881bebab 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -7,7 +7,14 @@ namespace Shader { struct Profile { - bool unified_descriptor_binding; + bool unified_descriptor_binding{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b25081e39..527e19c27 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -14,14 +14,15 @@ namespace Shader { -std::pair> RecompileSPIRV(Environment& env, u32 start_address) { +std::pair> RecompileSPIRV(const Profile& profile, Environment& env, + u32 start_address) { ObjectPool flow_block_pool; ObjectPool inst_pool; ObjectPool block_pool; Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 4cb973878..2529463ae 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -9,10 +9,12 @@ #include "common/common_types.h" #include "shader_recompiler/environment.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" namespace Shader { -[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); +[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, + Environment& env, u32 start_address); } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f49a79368..8766bf13e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -31,14 +31,15 @@ struct Info { bool uses_local_invocation_id{}; bool uses_fp16{}; bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; u32 constant_buffer_mask{}; - std::array constant_buffers{}; boost::container::static_vector constant_buffer_descriptors; - - std::array storage_buffers{}; boost::container::static_vector storage_buffers_descriptors; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -684,7 +684,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -128,6 +128,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -223,11 +228,6 @@ public: return reported_extensions; } - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - u64 GetDeviceLocalMemory() const { return device_access_memory; } @@ -245,6 +245,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,14 +270,15 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. @@ -301,9 +305,6 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: [PATCH 027/770] shader: Rename, implement FADD.SAT and P2R (imm) --- src/shader_recompiler/CMakeLists.txt | 3 +- .../backend/spirv/emit_spirv.h | 40 +++++----- .../spirv/emit_spirv_floating_point.cpp | 58 ++++++-------- .../backend/spirv/emit_spirv_integer.cpp | 75 +++++++++++++------ .../backend/spirv/emit_spirv_select.cpp | 4 +- .../frontend/ir/ir_emitter.cpp | 16 ++-- src/shader_recompiler/frontend/ir/pred.h | 4 +- .../frontend/maxwell/program.cpp | 2 +- .../translate/impl/floating_point_add.cpp | 20 ++--- .../floating_point_conversion_integer.cpp | 2 +- .../floating_point_fused_multiply_add.cpp | 4 +- .../impl/floating_point_multiply.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 17 ++++- .../frontend/maxwell/translate/impl/impl.h | 7 +- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../impl/move_predicate_to_register.cpp | 66 ++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 --- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +- 18 files changed, 213 insertions(+), 127 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fbd4ec6dc..802527255 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -74,9 +74,10 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp - frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/move_predicate_to_register.cpp frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp + frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index de624a151..922e294a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -110,7 +110,7 @@ void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); void EmitSelect8(EmitContext& ctx); void EmitSelect16(EmitContext& ctx); -void EmitSelect32(EmitContext& ctx); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitSelect64(EmitContext& ctx); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); @@ -130,9 +130,9 @@ void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx); -void EmitFPAbs32(EmitContext& ctx); -void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); @@ -146,9 +146,9 @@ void EmitFPMin64(EmitContext& ctx); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -void EmitFPNeg16(EmitContext& ctx); -void EmitFPNeg32(EmitContext& ctx); -void EmitFPNeg64(EmitContext& ctx); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); void EmitFPRecip32(EmitContext& ctx); void EmitFPRecip64(EmitContext& ctx); void EmitFPRecipSqrt32(EmitContext& ctx); @@ -161,9 +161,9 @@ void EmitFPExp2NotReduced(EmitContext& ctx); void EmitFPCos(EmitContext& ctx); void EmitFPCosNotReduced(EmitContext& ctx); void EmitFPLog2(EmitContext& ctx); -void EmitFPSaturate16(EmitContext& ctx); -void EmitFPSaturate32(EmitContext& ctx); -void EmitFPSaturate64(EmitContext& ctx); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); Id EmitFPRoundEven16(EmitContext& ctx, Id value); Id EmitFPRoundEven32(EmitContext& ctx, Id value); Id EmitFPRoundEven64(EmitContext& ctx, Id value); @@ -186,21 +186,21 @@ void EmitIAbs32(EmitContext& ctx); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); void EmitShiftRightLogical32(EmitContext& ctx); void EmitShiftRightArithmetic32(EmitContext& ctx); -void EmitBitwiseAnd32(EmitContext& ctx); -void EmitBitwiseOr32(EmitContext& ctx); -void EmitBitwiseXor32(EmitContext& ctx); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); void EmitBitFieldInsert(EmitContext& ctx); void EmitBitFieldSExtract(EmitContext& ctx); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitULessThan(EmitContext& ctx); -void EmitIEqual(EmitContext& ctx); -void EmitSLessThanEqual(EmitContext& ctx); -void EmitULessThanEqual(EmitContext& ctx); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitUGreaterThan(EmitContext& ctx); -void EmitINotEqual(EmitContext& ctx); -void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); void EmitLogicalOr(EmitContext& ctx); void EmitLogicalAnd(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9687de37..47f87054b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -12,37 +12,21 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { if (flags.no_contraction) { ctx.Decorate(op, spv::Decoration::NoContraction); } - switch (flags.rounding) { - case IR::FpRounding::DontCare: - break; - case IR::FpRounding::RN: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); - break; - case IR::FpRounding::RM: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); - break; - case IR::FpRounding::RP: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); - break; - case IR::FpRounding::RZ: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); - break; - } return op; } } // Anonymous namespace -void EmitFPAbs16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs16(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F16[1], value); } -void EmitFPAbs32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs32(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F32[1], value); } -void EmitFPAbs64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs64(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F64[1], value); } Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { @@ -97,16 +81,16 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitFPNeg16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg16(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F16[1], value); } -void EmitFPNeg32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg32(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F32[1], value); } -void EmitFPNeg64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg64(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F64[1], value); } void EmitFPRecip32(EmitContext&) { @@ -157,16 +141,22 @@ void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitFPSaturate16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate16(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; + const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); } -void EmitFPSaturate32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate32(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; + const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); } -void EmitFPSaturate64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate64(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; + const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; + return ctx.OpFClamp(ctx.F64[1], value, zero, one); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 22117a4ee..4c0b5990d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -7,10 +7,39 @@ namespace Shader::Backend::SPIRV { Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - if (inst->HasAssociatedPseudoOperation()) { - throw NotImplementedException("Pseudo-operations on IAdd32"); + Id result{}; + if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; + const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; + result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + + const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; + carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); + carry->Invalidate(); + } else { + result = ctx.OpIAdd(ctx.U32[1], a, b); } - return ctx.OpIAdd(ctx.U32[1], a, b); + if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); + } + if (IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}) { + sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); + sign->Invalidate(); + } + if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)}; + + const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; + const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; + const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; + overflow->SetDefinition(carry_flag); + overflow->Invalidate(); + } + return result; } void EmitIAdd64(EmitContext&) { @@ -49,16 +78,16 @@ void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitBitwiseAnd32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseAnd(ctx.U32[1], a, b); } -void EmitBitwiseOr32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseOr(ctx.U32[1], a, b); } -void EmitBitwiseXor32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseXor(ctx.U32[1], a, b); } void EmitBitFieldInsert(EmitContext&) { @@ -77,36 +106,36 @@ Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitULessThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1, lhs, rhs); } -void EmitIEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1, lhs, rhs); } -void EmitSLessThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); } -void EmitULessThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); } Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitUGreaterThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); } -void EmitINotEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpINotEqual(ctx.U1, lhs, rhs); } -void EmitSGreaterThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); } Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 8d5062724..eb1926a4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -14,8 +14,8 @@ void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSelect32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); } void EmitSelect64(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8f120a2f6..34c2f67fb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -468,11 +468,11 @@ F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPAbs16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPAbs32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPAbs64, value); default: ThrowInvalidType(value.Type()); @@ -481,11 +481,11 @@ F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPNeg16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPNeg32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPNeg64, value); default: ThrowInvalidType(value.Type()); @@ -495,10 +495,10 @@ F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { F16F32F64 result{value}; if (abs) { - result = FPAbs(value); + result = FPAbs(result); } if (neg) { - result = FPNeg(value); + result = FPNeg(result); } return result; } diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index c6f2f82bf..4e7f32423 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -19,8 +19,8 @@ enum class Pred : u64 { PT, }; -constexpr size_t NUM_USER_PREDS = 6; -constexpr size_t NUM_PREDS = 7; +constexpr size_t NUM_USER_PREDS = 7; +constexpr size_t NUM_PREDS = 8; [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { return static_cast(pred); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 16cdc12e2..ed5dbf41f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool src_a; } const fadd{insn}; - if (sat) { - throw NotImplementedException("FADD SAT"); - } if (cc) { throw NotImplementedException("FADD CC"); } @@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); } void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20F(insn)); + FADD(*this, insn, GetRegFloat20(insn)); } -void TranslatorVisitor::FADD_cbuf(u64) { - throw NotImplementedException("FADD (cbuf)"); +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FADD_imm(u64) { - throw NotImplementedException("FADD (imm)"); +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); } void TranslatorVisitor::FADD32I(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 4d82a0009..81175627f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) { case SrcFormat::F16: return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; case SrcFormat::F32: - return GetCbufF(insn); + return GetFloatCbuf(insn); case SrcFormat::F64: { return UnpackCbuf(*this, insn); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 1464f2807..758700d3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); + FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 1b1d38be7..5c38d3fc1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20F(insn)); + return FMUL(*this, insn, GetRegFloat20(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 079e3497f..be17bb0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { return ir.BitCast(GetCbuf(insn)); } @@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { return ir.Imm32(value); } +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; + const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 27aba2cf8..4d4cf2ebf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,13 +304,14 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetReg20F(u64 insn); - [[nodiscard]] IR::F32 GetReg39F(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::F32 GetCbufF(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 623e78ff8..1493e1815 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) { IADD(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::IADD_imm(u64) { - throw NotImplementedException("IADD (imm)"); +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::IADD32I(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6b2a1356b..628cf1c14 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) { ThrowNotImplemented(Opcode::OUT_imm); } -void TranslatorVisitor::P2R_reg(u64) { - ThrowNotImplemented(Opcode::P2R_reg); -} - -void TranslatorVisitor::P2R_cbuf(u64) { - ThrowNotImplemented(Opcode::P2R_cbuf); -} - -void TranslatorVisitor::P2R_imm(u64) { - ThrowNotImplemented(Opcode::P2R_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 49ff911d6..b25af6cd3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; - + /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); - + */ shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: [PATCH 028/770] spirv: Fixes and Intel specific workarounds --- .../backend/spirv/emit_context.cpp | 3 ++- .../backend/spirv/emit_context.h | 5 ++++- .../backend/spirv/emit_spirv.cpp | 6 +++--- src/shader_recompiler/backend/spirv/emit_spirv.h | 8 ++++---- .../backend/spirv/emit_spirv_floating_point.cpp | 13 ++++++++++--- .../backend/spirv/emit_spirv_logical.cpp | 16 ++++++++-------- .../frontend/ir/structured_control_flow.cpp | 3 --- .../frontend/maxwell/program.cpp | 3 --- .../frontend/maxwell/translate/impl/impl.cpp | 15 +++++++++------ src/shader_recompiler/profile.h | 3 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 11 files changed, 44 insertions(+), 32 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea1c8a3be..d2dbd56d4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -25,7 +25,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { +EmitContext::EmitContext(const Profile& profile_, IR::Program& program) + : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8de203da2..d20cf387e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { @@ -30,11 +31,13 @@ private: class EmitContext final : public Sirit::Module { public: - explicit EmitContext(IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); + const Profile& profile; + Id void_id{}; Id U1{}; Id U16{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2519e446a..f3aca90d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -150,11 +150,11 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); } else { // Same as fp32, no need to warn as most drivers will flush by default } - } else if (info.uses_fp32_denorms_preserve) { + } else if (info.uses_fp16_denorms_preserve) { if (profile.support_fp16_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); @@ -166,7 +166,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{program}; + EmitContext ctx{profile, program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) Id func{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 922e294a7..cec80c13e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -202,10 +202,10 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -void EmitLogicalOr(EmitContext& ctx); -void EmitLogicalAnd(EmitContext& ctx); -void EmitLogicalXor(EmitContext& ctx); -void EmitLogicalNot(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); Id EmitConvertS16F16(EmitContext& ctx, Id value); Id EmitConvertS16F32(EmitContext& ctx, Id value); Id EmitConvertS16F64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 47f87054b..5d0b74f9b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -15,6 +15,13 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { return op; } +Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) { + if (ctx.profile.has_broken_spirv_clamp) { + return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); + } else { + return ctx.OpFClamp(type, value, zero, one); + } +} } // Anonymous namespace Id EmitFPAbs16(EmitContext& ctx, Id value) { @@ -144,19 +151,19 @@ void EmitFPLog2(EmitContext&) { Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; - return ctx.OpFClamp(ctx.F32[1], value, zero, one); + return Saturate(ctx, ctx.F16[1], value, zero, one); } Id EmitFPSaturate32(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; - return ctx.OpFClamp(ctx.F32[1], value, zero, one); + return Saturate(ctx, ctx.F32[1], value, zero, one); } Id EmitFPSaturate64(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; - return ctx.OpFClamp(ctx.F64[1], value, zero, one); + return Saturate(ctx, ctx.F64[1], value, zero, one); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index c5a07252f..bb434def2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,20 +6,20 @@ namespace Shader::Backend::SPIRV { -void EmitLogicalOr(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalOr(ctx.U1, a, b); } -void EmitLogicalAnd(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalAnd(ctx.U1, a, b); } -void EmitLogicalXor(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalNotEqual(ctx.U1, a, b); } -void EmitLogicalNot(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalNot(EmitContext& ctx, Id value) { + return ctx.OpLogicalNot(ctx.U1, value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index d145095d1..032ac8fda 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -272,11 +272,9 @@ public: explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; - fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); } - fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); } Statement& RootStatement() noexcept { @@ -548,7 +546,6 @@ private: size_t Offset(ConstNode stmt) const { size_t offset{0}; if (!SearchNode(root_stmt.children, stmt, offset)) { - fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); throw LogicError("Node not found in tree"); } return offset; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ed5dbf41f..dbfc04f75 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,7 +56,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const s32 positive_value{static_cast(imm.value)}; - const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + if (imm.is_negative != 0) { + const s64 raw{static_cast(imm.value)}; + return ir.Imm32(static_cast(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast(imm.value)); + } } IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { @@ -94,9 +97,9 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; - const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 value{static_cast(imm.value) << 12}; + return ir.Imm32(Common::BitCast(value | sign_bit)); } IR::U32 TranslatorVisitor::GetImm32(u64 insn) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9881bebab..917fc1251 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,9 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; + + // FClamp is broken and OpFMax + OpFMin should be used instead + bool has_broken_spirv_clamp{}; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b25af6cd3..2497c2385 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -189,6 +189,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; /* From 18a766b3622baa40596490dbd4912f94e9980a76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 02:45:50 -0300 Subject: [PATCH 029/770] shader: Fix MOV(reg), add SHL variants and emit neg and abs instructions --- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 ++-- .../backend/spirv/emit_spirv_integer.cpp | 8 ++++---- .../maxwell/translate/impl/integer_shift_left.cpp | 8 ++++---- .../frontend/maxwell/translate/impl/move_register.cpp | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index cec80c13e..1b9be445e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -181,8 +181,8 @@ void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); void EmitISub64(EmitContext& ctx); Id EmitIMul32(EmitContext& ctx, Id a, Id b); -void EmitINeg32(EmitContext& ctx); -void EmitIAbs32(EmitContext& ctx); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); void EmitShiftRightLogical32(EmitContext& ctx); void EmitShiftRightArithmetic32(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 4c0b5990d..329dcb351 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -58,12 +58,12 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } -void EmitINeg32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitINeg32(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U32[1], value); } -void EmitIAbs32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitIAbs32(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U32[1], value); } Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index b752785d4..d8a5158b5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -56,12 +56,12 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { } } // Anonymous namespace -void TranslatorVisitor::SHL_reg(u64) { - throw NotImplementedException("SHL_reg"); +void TranslatorVisitor::SHL_reg(u64 insn) { + SHL(*this, insn, GetReg20(insn)); } -void TranslatorVisitor::SHL_cbuf(u64) { - throw NotImplementedException("SHL_cbuf"); +void TranslatorVisitor::SHL_cbuf(u64 insn) { + SHL(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::SHL_imm(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index c3c4b9abd..6bb08db8a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -26,7 +26,7 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - MOV(*this, insn, GetReg8(insn)); + MOV(*this, insn, GetReg20(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { From e44752ddc8804961eb84f8c225bb36d5b4c77bc1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 22:59:16 -0300 Subject: [PATCH 030/770] shader: FMUL, select, RRO, and MUFU fixes --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 67 ++++-- .../spirv/emit_spirv_floating_point.cpp | 192 +++++++++++++++--- .../backend/spirv/emit_spirv_select.cpp | 21 +- .../frontend/ir/ir_emitter.cpp | 144 +++++++++++-- .../frontend/ir/ir_emitter.h | 18 +- src/shader_recompiler/frontend/ir/opcodes.inc | 50 ++++- .../maxwell/translate/impl/common_encoding.h | 3 +- .../translate/impl/floating_point_add.cpp | 2 +- .../floating_point_fused_multiply_add.cpp | 4 +- .../impl/floating_point_multi_function.cpp | 8 +- .../impl/floating_point_multiply.cpp | 42 ++-- .../impl/floating_point_range_reduction.cpp | 41 ++++ .../frontend/maxwell/translate/impl/impl.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.h | 5 +- .../translate/impl/integer_shift_left.cpp | 2 +- .../translate/impl/not_implemented.cpp | 12 -- .../ir_opt/constant_propagation_pass.cpp | 2 +- 18 files changed, 507 insertions(+), 119 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 802527255..5574feaa6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp + frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 1b9be445e..130c71996 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -108,10 +108,12 @@ void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); -void EmitSelect8(EmitContext& ctx); -void EmitSelect16(EmitContext& ctx); -Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitSelect64(EmitContext& ctx); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); @@ -149,18 +151,15 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPNeg16(EmitContext& ctx, Id value); Id EmitFPNeg32(EmitContext& ctx, Id value); Id EmitFPNeg64(EmitContext& ctx, Id value); -void EmitFPRecip32(EmitContext& ctx); -void EmitFPRecip64(EmitContext& ctx); -void EmitFPRecipSqrt32(EmitContext& ctx); -void EmitFPRecipSqrt64(EmitContext& ctx); -void EmitFPSqrt(EmitContext& ctx); -void EmitFPSin(EmitContext& ctx); -void EmitFPSinNotReduced(EmitContext& ctx); -void EmitFPExp2(EmitContext& ctx); -void EmitFPExp2NotReduced(EmitContext& ctx); -void EmitFPCos(EmitContext& ctx); -void EmitFPCosNotReduced(EmitContext& ctx); -void EmitFPLog2(EmitContext& ctx); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); @@ -176,6 +175,42 @@ Id EmitFPCeil64(EmitContext& ctx, Id value); Id EmitFPTrunc16(EmitContext& ctx, Id value); Id EmitFPTrunc32(EmitContext& ctx, Id value); Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 5d0b74f9b..749f11742 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -100,52 +100,40 @@ Id EmitFPNeg64(EmitContext& ctx, Id value) { return ctx.OpFNegate(ctx.F64[1], value); } -void EmitFPRecip32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSin(EmitContext& ctx, Id value) { + return ctx.OpSin(ctx.F32[1], value); } -void EmitFPRecip64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCos(EmitContext& ctx, Id value) { + return ctx.OpCos(ctx.F32[1], value); } -void EmitFPRecipSqrt32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPExp2(EmitContext& ctx, Id value) { + return ctx.OpExp2(ctx.F32[1], value); } -void EmitFPRecipSqrt64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPLog2(EmitContext& ctx, Id value) { + return ctx.OpLog2(ctx.F32[1], value); } -void EmitFPSqrt(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecip32(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F32[1], ctx.Constant(ctx.F32[1], 1.0f), value); } -void EmitFPSin(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecip64(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); } -void EmitFPSinNotReduced(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); } -void EmitFPExp2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F64[1], value); } -void EmitFPExp2NotReduced(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitFPCos(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitFPCosNotReduced(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitFPLog2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSqrt(EmitContext& ctx, Id value) { + return ctx.OpSqrt(ctx.F32[1], value); } Id EmitFPSaturate16(EmitContext& ctx, Id value) { @@ -214,4 +202,148 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) { return ctx.OpTrunc(ctx.F64[1], value); } +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index eb1926a4d..21cca4455 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,20 +6,29 @@ namespace Shader::Backend::SPIRV { -void EmitSelect8(EmitContext&) { +Id EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Id cond, + [[maybe_unused]] Id true_value, [[maybe_unused]] Id false_value) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSelect16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U16, cond, true_value, false_value); } -Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); } -void EmitSelect64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U64, cond, true_value, false_value); +} + +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); +} + +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 34c2f67fb..8ba86e614 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -361,19 +361,21 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { } } -UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { +Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); } switch (true_value.Type()) { case Type::U8: - return Inst(Opcode::Select8, condition, true_value, false_value); + return Inst(Opcode::SelectU8, condition, true_value, false_value); case Type::U16: - return Inst(Opcode::Select16, condition, true_value, false_value); + return Inst(Opcode::SelectU16, condition, true_value, false_value); case Type::U32: - return Inst(Opcode::Select32, condition, true_value, false_value); + return Inst(Opcode::SelectU32, condition, true_value, false_value); case Type::U64: - return Inst(Opcode::Select64, condition, true_value, false_value); + return Inst(Opcode::SelectU64, condition, true_value, false_value); + case Type::F32: + return Inst(Opcode::SelectF32, condition, true_value, false_value); default: throw InvalidArgument("Invalid type {}", true_value.Type()); } @@ -503,12 +505,16 @@ F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { return result; } -F32 IREmitter::FPCosNotReduced(const F32& value) { - return Inst(Opcode::FPCosNotReduced, value); +F32 IREmitter::FPCos(const F32& value) { + return Inst(Opcode::FPCos, value); } -F32 IREmitter::FPExp2NotReduced(const F32& value) { - return Inst(Opcode::FPExp2NotReduced, value); +F32 IREmitter::FPSin(const F32& value) { + return Inst(Opcode::FPSin, value); +} + +F32 IREmitter::FPExp2(const F32& value) { + return Inst(Opcode::FPExp2, value); } F32 IREmitter::FPLog2(const F32& value) { @@ -517,9 +523,9 @@ F32 IREmitter::FPLog2(const F32& value) { F32F64 IREmitter::FPRecip(const F32F64& value) { switch (value.Type()) { - case Type::U32: + case Type::F32: return Inst(Opcode::FPRecip32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRecip64, value); default: ThrowInvalidType(value.Type()); @@ -528,19 +534,15 @@ F32F64 IREmitter::FPRecip(const F32F64& value) { F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { switch (value.Type()) { - case Type::U32: + case Type::F32: return Inst(Opcode::FPRecipSqrt32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRecipSqrt64, value); default: ThrowInvalidType(value.Type()); } } -F32 IREmitter::FPSinNotReduced(const F32& value) { - return Inst(Opcode::FPSinNotReduced, value); -} - F32 IREmitter::FPSqrt(const F32& value) { return Inst(Opcode::FPSqrt, value); } @@ -610,6 +612,114 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { } } +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs, + rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, + rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, + rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, + lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual16 + : Opcode::FPUnordGreaterThanEqual16, + lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 + : Opcode::FPUnordGreaterThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 + : Opcode::FPUnordGreaterThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 959f4f9da..2c923716a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -98,7 +98,8 @@ public: const Value& e4); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); - [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, + const Value& false_value); template [[nodiscard]] Dest BitCast(const Source& value); @@ -121,12 +122,12 @@ public: [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); - [[nodiscard]] F32 FPCosNotReduced(const F32& value); - [[nodiscard]] F32 FPExp2NotReduced(const F32& value); + [[nodiscard]] F32 FPCos(const F32& value); + [[nodiscard]] F32 FPSin(const F32& value); + [[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value); [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); - [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); @@ -134,6 +135,15 @@ public: [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + bool ordered = true); + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 50da77535..f2d71144a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -103,10 +103,12 @@ OPCODE(CompositeExtractF64x3, F64, F64x OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) // Select operations -OPCODE(Select8, U8, U1, U8, U8, ) -OPCODE(Select16, U16, U1, U16, U16, ) -OPCODE(Select32, U32, U1, U32, U32, ) -OPCODE(Select64, U64, U1, U64, U64, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) @@ -156,11 +158,8 @@ OPCODE(FPRecipSqrt32, F32, F32, OPCODE(FPRecipSqrt64, F64, F64, ) OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) -OPCODE(FPSinNotReduced, F32, F32, ) OPCODE(FPExp2, F32, F32, ) -OPCODE(FPExp2NotReduced, F32, F32, ) OPCODE(FPCos, F32, F32, ) -OPCODE(FPCosNotReduced, F32, F32, ) OPCODE(FPLog2, F32, F32, ) OPCODE(FPSaturate16, F16, F16, ) OPCODE(FPSaturate32, F32, F32, ) @@ -178,6 +177,43 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) + // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h index 3da37a2bb..fd73f656c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -46,7 +46,8 @@ inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { case FmzMode::FTZ: return IR::FmzMode::FTZ; case FmzMode::FMZ: - return IR::FmzMode::FMZ; + // FMZ is manually handled in the instruction + return IR::FmzMode::FTZ; case FmzMode::INVALIDFMZ3: break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 219ffcc6a..76a807d4e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetRegFloat20(insn)); + FADD(*this, insn, GetFloatReg20(insn)); } void TranslatorVisitor::FADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 758700d3c..c2ca0873b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index ba005fbf4..2f8605619 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { -enum class Operation { +enum class Operation : u64 { Cos = 0, Sin = 1, Ex2 = 2, // Base 2 exponent @@ -39,11 +39,11 @@ void TranslatorVisitor::MUFU(u64 insn) { IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: - return ir.FPCosNotReduced(op_a); + return ir.FPCos(op_a); case Operation::Sin: - return ir.FPSinNotReduced(op_a); + return ir.FPSin(op_a); case Operation::Ex2: - return ir.FPExp2NotReduced(op_a); + return ir.FPExp2(op_a); case Operation::Lg2: return ir.FPLog2(op_a); case Operation::Rcp: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 5c38d3fc1..edf2cadae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -55,9 +55,6 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode if (cc) { throw NotImplementedException("FMUL CC"); } - if (sat) { - throw NotImplementedException("FMUL SAT"); - } IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { @@ -71,7 +68,20 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, zero, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fmul.dest_reg, value); } void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -83,27 +93,33 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; - } fmul{insn}; - + } const fmul{insn}; FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetRegFloat20(insn)); + return FMUL(*this, insn, GetFloatReg20(insn)); } -void TranslatorVisitor::FMUL_cbuf(u64) { - throw NotImplementedException("FMUL (cbuf)"); +void TranslatorVisitor::FMUL_cbuf(u64 insn) { + return FMUL(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FMUL_imm(u64) { - throw NotImplementedException("FMUL (imm)"); +void TranslatorVisitor::FMUL_imm(u64 insn) { + return FMUL(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FMUL32I(u64) { - throw NotImplementedException("FMUL32I"); +void TranslatorVisitor::FMUL32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz; + BitField<55, 1, u64> sat; + } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, + fmul32i.sat != 0, fmul32i.cc != 0, false); } } // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + SINCOS, + EX2, +}; + +void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 1, Mode> mode; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + } const rro{insn}; + + v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); +} +} // Anonymous namespace + +void TranslatorVisitor::RRO_reg(u64 insn) { + RRO(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::RRO_cbuf(u64 insn) { + RRO(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::RRO_imm(u64) { + throw NotImplementedException("RRO (imm)"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 165d475b9..a5a0e1a9b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -110,6 +110,14 @@ IR::U32 TranslatorVisitor::GetImm32(u64 insn) { return ir.Imm32(static_cast(imm.value)); } +IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(Common::BitCast(static_cast(imm.value))); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4d4cf2ebf..4e722e205 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,8 +304,8 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); @@ -314,6 +314,7 @@ public: [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d8a5158b5..20af68852 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -50,7 +50,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { // const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; - result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; } v.X(shl.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 628cf1c14..4114e10be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -721,18 +721,6 @@ void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } -void TranslatorVisitor::RRO_reg(u64) { - ThrowNotImplemented(Opcode::RRO_reg); -} - -void TranslatorVisitor::RRO_cbuf(u64) { - ThrowNotImplemented(Opcode::RRO_cbuf); -} - -void TranslatorVisitor::RRO_imm(u64) { - ThrowNotImplemented(Opcode::RRO_imm); -} - void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d4e88259..ae3d5a7d6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -330,7 +330,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); - case IR::Opcode::Select32: + case IR::Opcode::SelectU32: return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); From 9d6a98d950da39dd2a7ca5ad25525de4fb825415 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Feb 2021 04:46:39 -0300 Subject: [PATCH 031/770] shader: Implement more of XMAD and FFMA32I and fix XMAD.CBCC --- .../backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_integer.cpp | 8 +-- .../floating_point_fused_multiply_add.cpp | 41 ++++++++++++---- .../impl/floating_point_multiply.cpp | 2 + .../impl/integer_short_multiply_add.cpp | 49 ++++++++++++++----- 5 files changed, 76 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 130c71996..4b74cf04d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -224,8 +224,8 @@ void EmitShiftRightArithmetic32(EmitContext& ctx); Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); -void EmitBitFieldInsert(EmitContext& ctx); -void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 329dcb351..8aaa0e381 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -90,12 +90,12 @@ Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { return ctx.OpBitwiseXor(ctx.U32[1], a, b); } -void EmitBitFieldInsert(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { + return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); } -void EmitBitFieldSExtract(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count) { + return ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count); } Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index c2ca0873b..18561bc9c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -17,9 +17,6 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s BitField<8, 8, IR::Reg> src_a; } const ffma{insn}; - if (sat) { - throw NotImplementedException("FFMA SAT"); - } if (cc) { throw NotImplementedException("FFMA CC"); } @@ -31,7 +28,20 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, op_c, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(ffma.dest_reg, value); } void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { @@ -54,20 +64,31 @@ void TranslatorVisitor::FFMA_reg(u64 insn) { FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_rc(u64) { - throw NotImplementedException("FFMA (rc)"); +void TranslatorVisitor::FFMA_rc(u64 insn) { + FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); } void TranslatorVisitor::FFMA_cr(u64 insn) { FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_imm(u64) { - throw NotImplementedException("FFMA (imm)"); +void TranslatorVisitor::FFMA_imm(u64 insn) { + FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA32I(u64) { - throw NotImplementedException("FFMA32I"); +void TranslatorVisitor::FFMA32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz_mode; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> neg_c; + } const ffma32i{insn}; + + FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, + ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index edf2cadae..72f0a18ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -94,6 +94,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; } const fmul{insn}; + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } @@ -118,6 +119,7 @@ void TranslatorVisitor::FMUL32I(u64 insn) { BitField<53, 2, FmzMode> fmz; BitField<55, 1, u64> sat; } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, fmul32i.sat != 0, fmul32i.cc != 0, false); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp index 70a7c76c5..2932cdc42 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -58,7 +58,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s case SelectMode::CHI: return ExtractHalf(v, src_c, Half::H1, false); case SelectMode::CBCC: - return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); case SelectMode::CSFU: throw NotImplementedException("XMAD CSFU"); } @@ -78,16 +78,44 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } } // Anonymous namespace -void TranslatorVisitor::XMAD_reg(u64) { - throw NotImplementedException("XMAD (reg)"); +void TranslatorVisitor::XMAD_reg(u64 insn) { + union { + u64 raw; + BitField<35, 1, Half> half_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } -void TranslatorVisitor::XMAD_rc(u64) { - throw NotImplementedException("XMAD (rc)"); +void TranslatorVisitor::XMAD_rc(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + } const xmad{insn}; + + XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, + xmad.x != 0); } -void TranslatorVisitor::XMAD_cr(u64) { - throw NotImplementedException("XMAD (cr)"); +void TranslatorVisitor::XMAD_cr(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + BitField<55, 1, u64> psl; + BitField<56, 1, u64> mrg; + } const xmad{insn}; + + XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } void TranslatorVisitor::XMAD_imm(u64 insn) { @@ -97,14 +125,11 @@ void TranslatorVisitor::XMAD_imm(u64 insn) { BitField<36, 1, u64> psl; BitField<37, 1, u64> mrg; BitField<38, 1, u64> x; - BitField<39, 8, IR::Reg> src_c; BitField<50, 3, SelectMode> select_mode; } const xmad{insn}; - const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; - const IR::U32 src_c{X(xmad.src_c)}; - XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, - xmad.x != 0); + XMAD(*this, insn, ir.Imm32(static_cast(xmad.src_b)), GetReg39(insn), xmad.select_mode, + Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); } } // namespace Shader::Maxwell From e87a502da2d5a8356a639d53c0a16a77890de4c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 05:21:30 -0300 Subject: [PATCH 032/770] shader: Fix control flow --- .../frontend/ir/ir_emitter.cpp | 27 ++++++++++++------- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- .../frontend/ir/structured_control_flow.cpp | 15 +++++++---- src/shader_recompiler/frontend/ir/value.cpp | 6 ++++- src/shader_recompiler/frontend/ir/value.h | 1 + .../frontend/maxwell/control_flow.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../translate/impl/not_implemented.cpp | 4 +-- 8 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8ba86e614..0209d5540 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -134,18 +134,27 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U1 IREmitter::Condition(IR::Condition cond) { - if (cond == IR::Condition{true}) { - return Imm1(true); - } else if (cond == IR::Condition{false}) { - return Imm1(false); +static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { + switch (flow_test) { + case FlowTest::T: + return ir.Imm1(true); + case FlowTest::F: + return ir.Imm1(false); + case FlowTest::EQ: + // TODO: Test this + return ir.GetZFlag(); + case FlowTest::NE: + // TODO: Test this + return ir.LogicalNot(ir.GetZFlag()); + default: + throw NotImplementedException("Flow test {}", flow_test); } +} + +U1 IREmitter::Condition(IR::Condition cond) { const FlowTest flow_test{cond.FlowTest()}; const auto [pred, is_negated]{cond.Pred()}; - if (flow_test == FlowTest::T) { - return GetPred(pred, is_negated); - } - throw NotImplementedException("Condition {}", cond); + return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } F32 IREmitter::GetAttribute(IR::Attribute attribute) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f2d71144a..289e76f32 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -4,8 +4,8 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Phi, Opaque, ) -OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow OPCODE(Branch, Void, Label, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index 032ac8fda..bfba55a7e 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -329,7 +329,6 @@ private: if (!sibling) { throw LogicError("Not siblings"); } - // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it @@ -351,9 +350,14 @@ private: const std::unordered_map labels_map{BuildLabels(blocks)}; Tree& root{root_stmt.children}; auto insert_point{root.begin()}; + // Skip all goto variables zero-initialization + std::advance(insert_point, labels_map.size()); + for (Block* const block : blocks) { - ++insert_point; // Skip label - ++insert_point; // Skip set variable + // Skip label + ++insert_point; + // Skip set variable + ++insert_point; root.insert(insert_point, *pool.Create(block, &root_stmt)); if (block->IsTerminationBlock()) { @@ -391,6 +395,7 @@ private: labels_map.emplace(block, root.insert(root.end(), *label)); Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + root.push_front(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); ++label_id; } return labels_map; @@ -457,10 +462,10 @@ private: } body.erase(goto_stmt); - // Update nested if condition switch (label_nested_stmt->type) { case StatementType::If: - label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + // Update nested if condition + label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); break; case StatementType::Loop: break; diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 718314213..791ba2690 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -36,6 +36,10 @@ bool Value::IsIdentity() const noexcept { return type == Type::Opaque && inst->Opcode() == Opcode::Identity; } +bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->Opcode() == Opcode::Phi; +} + bool Value::IsEmpty() const noexcept { return type == Type::Void; } @@ -52,7 +56,7 @@ bool Value::IsLabel() const noexcept { } IR::Type Value::Type() const noexcept { - if (IsIdentity()) { + if (IsIdentity() || IsPhi()) { return inst->Arg(0).Type(); } if (type == Type::Opaque) { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8aba0bbf6..9b7e1480b 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -33,6 +33,7 @@ public: explicit Value(f64 value) noexcept; [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsPhi() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e766b555b..52be41b84 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -354,7 +354,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4e722e205..672e140b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -209,7 +209,7 @@ public: void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); void PBK(); - void PCNT(u64 insn); + void PCNT(); void PEXIT(u64 insn); void PIXLD(u64 insn); void PLONGJMP(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4114e10be..3f6dedfdd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -637,8 +637,8 @@ void TranslatorVisitor::PBK() { // PBK is a no-op } -void TranslatorVisitor::PCNT(u64) { - ThrowNotImplemented(Opcode::PCNT); +void TranslatorVisitor::PCNT() { + // PCNT is a no-op } void TranslatorVisitor::PEXIT(u64) { From 7496bbf7584049fb9c99cf705e9cc16aee61a55a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:31:32 -0300 Subject: [PATCH 033/770] spirv: Add support for self-referencing phi nodes --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index f3aca90d0..bcd6bda28 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -217,9 +217,16 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { IR::Inst* const arg_inst{arg.Inst()}; def = arg_inst->Definition(); if (!Sirit::ValidId(def)) { - // If it hasn't been defined, get a forward declaration - def = ctx.ForwardDeclarationId(); - arg_inst->SetDefinition(def); + if (arg_inst == inst) { + // This is a self referencing phi node + def = ctx.CurrentId(); + // Self-referencing definition will be set by the caller + } else { + // If it hasn't been defined and it's not a self reference, + // get a forward declaration + def = ctx.ForwardDeclarationId(); + arg_inst->SetDefinition(def); + } } } IR::Block* const phi_block{inst->PhiBlock(index)}; From 622d676202bad317a58529efc3c15d08fd04aad1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:32:00 -0300 Subject: [PATCH 034/770] shader: Fix conditional execution of exit instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 +++++---- src/shader_recompiler/frontend/maxwell/control_flow.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 52be41b84..d0dc66330 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -322,12 +322,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond) { + EndClass insn_end_class, IR::Condition cond, + bool visit_conditional_inst) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -354,7 +355,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc + 1; + conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; @@ -423,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 8179787b8..209c9e551 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -128,7 +128,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond); + IR::Condition cond, bool visit_conditional_inst); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, From 3bc857f2f34b2959a545d3b4e26f27ca9751f788 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:33:07 -0300 Subject: [PATCH 035/770] shader: Avoid infinite recursion when tracking global memory --- .../global_memory_to_storage_buffer_pass.cpp | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 03bd547b7..98e3dfef7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -43,6 +43,8 @@ using StorageBufferSet = boost::container::flat_set, boost::container::small_vector>; using StorageInstVector = boost::container::small_vector; +using VisitedBlocks = boost::container::flat_set, + boost::container::small_vector>; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -194,7 +196,8 @@ std::optional TrackLowAddress(IR::Inst* inst) { } /// Recursively tries to track the storage buffer address used by a global memory instruction -std::optional Track(const IR::Value& value, const Bias* bias) { +std::optional Track(IR::Block* block, const IR::Value& value, const Bias* bias, + VisitedBlocks& visited) { if (value.IsImmediate()) { // Immediates can't be a storage buffer return std::nullopt; @@ -223,8 +226,24 @@ std::optional Track(const IR::Value& value, const Bias* bias) } // Reversed loops are more likely to find the right result for (size_t arg = inst->NumArgs(); arg--;) { - if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { - return *storage_buffer; + if (inst->Opcode() == IR::Opcode::Phi) { + // If we are going through a phi node, mark the current block as visited + visited.insert(block); + // and skip already visited blocks to avoid looping forever + IR::Block* const phi_block{inst->PhiBlock(arg)}; + if (visited.contains(phi_block)) { + // Already visited, skip + continue; + } + const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; + } + } else { + const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; + } } } return std::nullopt; @@ -248,10 +267,12 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - std::optional storage_buffer{Track(low_addr, &nvn_bias)}; + VisitedBlocks visited_blocks; + std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)}; if (!storage_buffer) { // If it fails, track without a bias - storage_buffer = Track(low_addr, nullptr); + visited_blocks.clear(); + storage_buffer = Track(&block, low_addr, nullptr, visited_blocks); if (!storage_buffer) { // If that also failed, drop the global memory usage DiscardGlobalMemory(block, inst); From 726625cf5057157fb5e4c9c210676930ff520dd2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:37:47 -0300 Subject: [PATCH 036/770] spirv: Move phi arguments emit to a separate function --- .../backend/spirv/emit_spirv.cpp | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index bcd6bda28..8097fe82d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -163,6 +163,31 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } } } + +Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { + // Phi nodes can have forward declarations, if an argument is not defined provide a forward + // declaration of it. Invoke will take care of giving it the right definition when it's + // actually defined. + const IR::Value arg{inst->Arg(index)}; + if (arg.IsImmediate()) { + // Let the context handle immediate definitions, as it already knows how + return ctx.Def(arg); + } + IR::Inst* const arg_inst{arg.Inst()}; + if (const Id def{arg_inst->Definition()}; Sirit::ValidId(def)) { + // Return the current definition if it exists + return def; + } + if (arg_inst == inst) { + // This is a self referencing phi node + // Self-referencing definition will be set by the caller, so just grab the current id + return ctx.CurrentId(); + } + // If it hasn't been defined and it's not a self reference, get a forward declaration + const Id def{ctx.ForwardDeclarationId()}; + arg_inst->SetDefinition(def); + return def; +} } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { @@ -205,33 +230,8 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { boost::container::small_vector operands; operands.reserve(num_args * 2); for (size_t index = 0; index < num_args; ++index) { - // Phi nodes can have forward declarations, if an argument is not defined provide a forward - // declaration of it. Invoke will take care of giving it the right definition when it's - // actually defined. - const IR::Value arg{inst->Arg(index)}; - Id def{}; - if (arg.IsImmediate()) { - // Let the context handle immediate definitions, as it already knows how - def = ctx.Def(arg); - } else { - IR::Inst* const arg_inst{arg.Inst()}; - def = arg_inst->Definition(); - if (!Sirit::ValidId(def)) { - if (arg_inst == inst) { - // This is a self referencing phi node - def = ctx.CurrentId(); - // Self-referencing definition will be set by the caller - } else { - // If it hasn't been defined and it's not a self reference, - // get a forward declaration - def = ctx.ForwardDeclarationId(); - arg_inst->SetDefinition(def); - } - } - } - IR::Block* const phi_block{inst->PhiBlock(index)}; - operands.push_back(def); - operands.push_back(phi_block->Definition()); + operands.push_back(PhiArgDef(ctx, inst, index)); + operands.push_back(inst->PhiBlock(index)->Definition()); } const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); From 8810c88b7e3de2766bf47e07e941fb2c58c6b4b0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 24 Feb 2021 20:31:15 -0500 Subject: [PATCH 037/770] shader: Implement SEL --- src/shader_recompiler/CMakeLists.txt | 1 + .../translate/impl/not_implemented.cpp | 12 ----- .../impl/select_source_with_predicate.cpp | 44 +++++++++++++++++++ .../ir_opt/ssa_rewrite_pass.cpp | 12 +++-- 4 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 5574feaa6..17ccb3d98 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -79,6 +79,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3f6dedfdd..82c73bf8c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -729,18 +729,6 @@ void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } -void TranslatorVisitor::SEL_reg(u64) { - ThrowNotImplemented(Opcode::SEL_reg); -} - -void TranslatorVisitor::SEL_cbuf(u64) { - ThrowNotImplemented(Opcode::SEL_cbuf); -} - -void TranslatorVisitor::SEL_imm(u64) { - ThrowNotImplemented(Opcode::SEL_imm); -} - void TranslatorVisitor::SETCRSPTR(u64) { ThrowNotImplemented(Opcode::SETCRSPTR); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..25fc6b437 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + } const sel{insn}; + + const IR::U1 pred = v.ir.GetPred(sel.pred); + IR::U32 op_a{v.X(sel.op_a)}; + IR::U32 op_b{src}; + if (sel.neg_pred != 0) { + std::swap(op_a, op_b); + } + const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; + + v.X(sel.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SEL_reg(u64 insn) { + SEL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SEL_cbuf(u64 insn) { + SEL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SEL_imm(u64 insn) { + SEL(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 13f9c914a..19d35b1f8 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -109,11 +109,13 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { class Pass { public: - void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) { + template + void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { current_def[variable].insert_or_assign(block, value); } - IR::Value ReadVariable(auto variable, IR::Block* block) { + template + IR::Value ReadVariable(Type variable, IR::Block* block) { const ValueMap& def{current_def[variable]}; if (const auto it{def.find(block)}; it != def.end()) { return it->second; @@ -132,7 +134,8 @@ public: } private: - IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { + template + IR::Value ReadVariableRecursive(Type variable, IR::Block* block) { IR::Value val; if (!sealed_blocks.contains(block)) { // Incomplete CFG @@ -154,7 +157,8 @@ private: return val; } - IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { + template + IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } From cc55d289494c991e7e0e456e428a110569708c2e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Feb 2021 00:46:40 -0500 Subject: [PATCH 038/770] shader: Implement SHR --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 5 +- .../backend/spirv/emit_spirv_integer.cpp | 12 ++-- .../frontend/ir/ir_emitter.cpp | 4 ++ .../frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../translate/impl/integer_shift_right.cpp | 62 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ---- 8 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 17ccb3d98..14dc51b5f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -72,6 +72,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_scaled_add.cpp frontend/maxwell/translate/impl/integer_set_predicate.cpp frontend/maxwell/translate/impl/integer_shift_left.cpp + frontend/maxwell/translate/impl/integer_shift_right.cpp frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4b74cf04d..90afbcc90 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -219,14 +219,15 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -void EmitShiftRightLogical32(EmitContext& ctx); -void EmitShiftRightArithmetic32(EmitContext& ctx); +Id EmitShiftRightLogical32(EmitContext& ctx, Id a, Id b); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 8aaa0e381..406df1b78 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -70,12 +70,12 @@ Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -void EmitShiftRightLogical32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitShiftRightLogical32(EmitContext& ctx, Id a, Id b) { + return ctx.OpShiftRightLogical(ctx.U32[1], a, b); } -void EmitShiftRightArithmetic32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id a, Id b) { + return ctx.OpShiftRightArithmetic(ctx.U32[1], a, b); } Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { @@ -102,6 +102,10 @@ Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } +Id EmitBitReverse32(EmitContext& ctx, Id value) { + return ctx.OpBitReverse(ctx.U32[1], value); +} + Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0209d5540..7c3908398 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -804,6 +804,10 @@ U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& co count); } +U32 IREmitter::BitReverse(const U32& value) { + return Inst(Opcode::BitReverse32, value); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2c923716a..f7998e156 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -159,6 +159,7 @@ public: const U32& count); [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, bool is_signed); + [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 289e76f32..f420f1161 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -231,6 +231,7 @@ OPCODE(BitwiseXor32, U32, U32, OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..a34ccb851 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> is_wrapped; + BitField<40, 1, u64> brev; + BitField<43, 1, u64> xmode; + BitField<48, 1, u64> is_arithmetic; + } const shr{insn}; + + if (shr.xmode != 0) { + throw NotImplementedException("SHR.XMODE"); + } + + IR::U32 base{v.X(shr.src_reg_a)}; + if (shr.brev == 1) { + base = v.ir.BitReverse(base); + } + IR::U32 result; + const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); + if (shr.is_arithmetic == 1) { + result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; + } else { + result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; + } + + if (shr.is_wrapped == 0) { + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 safe_bits{v.ir.Imm32(32)}; + + const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; + const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; + const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; + } + v.X(shr.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHR_reg(u64 insn) { + SHR(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHR_cbuf(u64 insn) { + SHR(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHR_imm(u64 insn) { + SHR(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 82c73bf8c..45ed04e25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -757,18 +757,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHR_reg(u64) { - ThrowNotImplemented(Opcode::SHR_reg); -} - -void TranslatorVisitor::SHR_cbuf(u64) { - ThrowNotImplemented(Opcode::SHR_cbuf); -} - -void TranslatorVisitor::SHR_imm(u64) { - ThrowNotImplemented(Opcode::SHR_imm); -} - void TranslatorVisitor::SSY() { // SSY is a no-op } From a8c41c50d3f7a1c2871487862f68925db8b5e27f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 26 Feb 2021 21:41:46 -0500 Subject: [PATCH 039/770] shader: Implement POPC --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 2 ++ .../backend/spirv/emit_spirv_integer.cpp | 8 +++++ .../frontend/ir/ir_emitter.cpp | 8 +++++ .../frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../translate/impl/integer_popcount.cpp | 36 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ------- 8 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 14dc51b5f..b4d2dcc42 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -69,6 +69,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_popcount.cpp frontend/maxwell/translate/impl/integer_scaled_add.cpp frontend/maxwell/translate/impl/integer_set_predicate.cpp frontend/maxwell/translate/impl/integer_shift_left.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 90afbcc90..64c8e9ef6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -228,6 +228,8 @@ Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id a); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 406df1b78..e49ca7bde 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -106,6 +106,14 @@ Id EmitBitReverse32(EmitContext& ctx, Id value) { return ctx.OpBitReverse(ctx.U32[1], value); } +Id EmitBitCount32(EmitContext& ctx, Id value) { + return ctx.OpBitCount(ctx.U32[1], value); +} + +Id EmitBitwiseNot32(EmitContext& ctx, Id a) { + return ctx.OpNot(ctx.U32[1], a); +} + Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7c3908398..54fdf9559 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -808,6 +808,14 @@ U32 IREmitter::BitReverse(const U32& value) { return Inst(Opcode::BitReverse32, value); } +U32 IREmitter::BitCount(const U32& value) { + return Inst(Opcode::BitCount32, value); +} + +U32 IREmitter::BitwiseNot(const U32& a) { + return Inst(Opcode::BitwiseNot32, a); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f7998e156..9dec22145 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -160,6 +160,8 @@ public: [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, bool is_signed); [[nodiscard]] U32 BitReverse(const U32& value); + [[nodiscard]] U32 BitCount(const U32& value); + [[nodiscard]] U32 BitwiseNot(const U32& a); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f420f1161..59a13e911 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -232,6 +232,8 @@ OPCODE(BitFieldInsert, U32, U32, OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + } const popc{insn}; + + const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); + const IR::U32 result = v.ir.BitCount(operand); + v.X(popc.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::POPC_reg(u64 insn) { + POPC(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::POPC_cbuf(u64 insn) { + POPC(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::POPC_imm(u64 insn) { + POPC(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 45ed04e25..127686b43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -653,18 +653,6 @@ void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } -void TranslatorVisitor::POPC_reg(u64) { - ThrowNotImplemented(Opcode::POPC_reg); -} - -void TranslatorVisitor::POPC_cbuf(u64) { - ThrowNotImplemented(Opcode::POPC_cbuf); -} - -void TranslatorVisitor::POPC_imm(u64) { - ThrowNotImplemented(Opcode::POPC_imm); -} - void TranslatorVisitor::PRET(u64) { ThrowNotImplemented(Opcode::PRET); } From 34ac9b4d7e71198503d7fca88c0494e1f97701e7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Feb 2021 01:45:13 -0500 Subject: [PATCH 040/770] shader: Implement BFE --- src/shader_recompiler/CMakeLists.txt | 1 + .../translate/impl/bitfield_extract.cpp | 66 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ---- 3 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b4d2dcc42..8be2d353f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -59,6 +59,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h + frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..4a03e6939 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_reg; + BitField<40, 1, u64> brev; + BitField<48, 1, u64> is_signed; + } const bfe{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + + // Common constants + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 one{v.ir.Imm32(1)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + // Edge case conditions + const IR::U1 zero_count{v.ir.IEqual(count, zero)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; + const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; + + IR::U32 base{v.X(bfe.offset_reg)}; + if (bfe.brev != 0) { + base = v.ir.BitReverse(base); + } + IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; + if (bfe.is_signed != 0) { + const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; + const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; + // Replicate condition + result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; + // Exceeding condition + const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; + result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; + } + // Zero count condition + result = IR::U32{v.ir.Select(zero_count, zero, result)}; + + v.X(bfe.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::BFE_reg(u64 insn) { + BFE(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::BFE_cbuf(u64 insn) { + BFE(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::BFE_imm(u64 insn) { + BFE(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 127686b43..3714f5f4f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -49,18 +49,6 @@ void TranslatorVisitor::BAR(u64) { ThrowNotImplemented(Opcode::BAR); } -void TranslatorVisitor::BFE_reg(u64) { - ThrowNotImplemented(Opcode::BFE_reg); -} - -void TranslatorVisitor::BFE_cbuf(u64) { - ThrowNotImplemented(Opcode::BFE_cbuf); -} - -void TranslatorVisitor::BFE_imm(u64) { - ThrowNotImplemented(Opcode::BFE_imm); -} - void TranslatorVisitor::BFI_reg(u64) { ThrowNotImplemented(Opcode::BFI_reg); } From 08a9e95905fa90d9d2455c9aedf66cebcfc6f6ba Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Feb 2021 16:37:49 -0500 Subject: [PATCH 041/770] shader: Implement BFI --- src/shader_recompiler/CMakeLists.txt | 1 + .../translate/impl/bitfield_insert.cpp | 56 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 16 ------ 3 files changed, 57 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8be2d353f..2e5de7f95 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -60,6 +60,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/program.cpp frontend/maxwell/program.h frontend/maxwell/translate/impl/bitfield_extract.cpp + frontend/maxwell/translate/impl/bitfield_insert.cpp frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..ee312c30d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> insert_reg; + } const bfi{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + + // Edge case conditions + const IR::U1 zero_offset{v.ir.IEqual(offset, v.ir.Imm32(0))}; + const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(unsafe_count, max_size, false)}; + + const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; + const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; + + const IR::U32 insert{v.X(bfi.insert_reg)}; + IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; + + result = IR::U32{v.ir.Select(exceed_offset, base, result)}; + result = IR::U32{v.ir.Select(zero_offset, base, result)}; + + v.X(bfi.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::BFI_reg(u64 insn) { + BFI(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_rc(u64 insn) { + BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::BFI_cr(u64 insn) { + BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_imm(u64 insn) { + BFI(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3714f5f4f..ed2cfac60 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -49,22 +49,6 @@ void TranslatorVisitor::BAR(u64) { ThrowNotImplemented(Opcode::BAR); } -void TranslatorVisitor::BFI_reg(u64) { - ThrowNotImplemented(Opcode::BFI_reg); -} - -void TranslatorVisitor::BFI_rc(u64) { - ThrowNotImplemented(Opcode::BFI_rc); -} - -void TranslatorVisitor::BFI_cr(u64) { - ThrowNotImplemented(Opcode::BFI_cr); -} - -void TranslatorVisitor::BFI_imm(u64) { - ThrowNotImplemented(Opcode::BFI_imm); -} - void TranslatorVisitor::BPT(u64) { ThrowNotImplemented(Opcode::BPT); } From 20390c0548d6eef2af67a363ee120a630267b741 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Feb 2021 23:33:53 -0500 Subject: [PATCH 042/770] shader: Implement IMNMX --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_integer.cpp | 16 +++++ .../frontend/ir/ir_emitter.cpp | 16 +++++ .../frontend/ir/ir_emitter.h | 5 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 4 ++ .../impl/integer_minimum_maximum.cpp | 59 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ---- 8 files changed, 105 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 2e5de7f95..e0568a058 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp frontend/maxwell/translate/impl/integer_scaled_add.cpp frontend/maxwell/translate/impl/integer_set_predicate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 64c8e9ef6..4d00b235d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -230,6 +230,10 @@ Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id a); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index e49ca7bde..5bdd943a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -114,6 +114,22 @@ Id EmitBitwiseNot32(EmitContext& ctx, Id a) { return ctx.OpNot(ctx.U32[1], a); } +Id EmitSMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSMin(ctx.U32[1], a, b); +} + +Id EmitUMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMin(ctx.U32[1], a, b); +} + +Id EmitSMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpSMax(ctx.U32[1], a, b); +} + +Id EmitUMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpUMax(ctx.U32[1], a, b); +} + Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 54fdf9559..04edcdfd8 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -816,6 +816,22 @@ U32 IREmitter::BitwiseNot(const U32& a) { return Inst(Opcode::BitwiseNot32, a); } +U32 IREmitter::SMin(const U32& a, const U32& b) { + return Inst(Opcode::SMin32, a, b); +} + +U32 IREmitter::UMin(const U32& a, const U32& b) { + return Inst(Opcode::UMin32, a, b); +} + +U32 IREmitter::SMax(const U32& a, const U32& b) { + return Inst(Opcode::SMax32, a, b); +} + +U32 IREmitter::UMax(const U32& a, const U32& b) { + return Inst(Opcode::UMax32, a, b); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 9dec22145..00ba2e4cd 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -163,6 +163,11 @@ public: [[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& a); + [[nodiscard]] U32 SMin(const U32& a, const U32& b); + [[nodiscard]] U32 UMin(const U32& a, const U32& b); + [[nodiscard]] U32 SMax(const U32& a, const U32& b); + [[nodiscard]] U32 UMax(const U32& a, const U32& b); + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 59a13e911..2c4a997dc 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -235,6 +235,10 @@ OPCODE(BitReverse32, U32, U32, OPCODE(BitCount32, U32, U32, ) OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) OPCODE(IEqual, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..12c6aae3d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 2, u64> mode; + BitField<48, 1, u64> is_signed; + } const imnmx{insn}; + + if (imnmx.mode != 0) { + throw NotImplementedException("IMNMX.MODE"); + } + + IR::U1 pred = v.ir.GetPred(imnmx.pred); + const IR::U32 op_a{v.X(imnmx.src_reg)}; + IR::U32 min; + IR::U32 max; + + if (imnmx.is_signed != 0) { + min = IR::U32{v.ir.SMin(op_a, op_b)}; + max = IR::U32{v.ir.SMax(op_a, op_b)}; + } else { + min = IR::U32{v.ir.UMin(op_a, op_b)}; + max = IR::U32{v.ir.UMax(op_a, op_b)}; + } + if (imnmx.neg_pred != 0) { + std::swap(min, max); + } + + const IR::U32 result{v.ir.Select(pred, min, max)}; + v.X(imnmx.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IMNMX_reg(u64 insn) { + IMNMX(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IMNMX_cbuf(u64 insn) { + IMNMX(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IMNMX_imm(u64 insn) { + IMNMX(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ed2cfac60..615e3c3b5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -453,18 +453,6 @@ void TranslatorVisitor::IMADSP_imm(u64) { ThrowNotImplemented(Opcode::IMADSP_imm); } -void TranslatorVisitor::IMNMX_reg(u64) { - ThrowNotImplemented(Opcode::IMNMX_reg); -} - -void TranslatorVisitor::IMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::IMNMX_cbuf); -} - -void TranslatorVisitor::IMNMX_imm(u64) { - ThrowNotImplemented(Opcode::IMNMX_imm); -} - void TranslatorVisitor::IMUL_reg(u64) { ThrowNotImplemented(Opcode::IMUL_reg); } From bce0b1dcca4e83ab8bb6692e98d021ded5c0ad5f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 00:18:30 -0500 Subject: [PATCH 043/770] shader: Implement ICMP --- src/shader_recompiler/CMakeLists.txt | 1 + .../translate/impl/integer_compare.cpp | 83 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 16 ---- 3 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index e0568a058..a227ce21b 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp frontend/maxwell/translate/impl/integer_scaled_add.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..1f604b0ee --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ComparisonOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +[[nodiscard]] IR::U1 CompareToZero(TranslatorVisitor& v, const IR::U32& operand, + ComparisonOp compare_op, bool is_signed) { + const IR::U32 zero{v.ir.Imm32(0)}; + switch (compare_op) { + case ComparisonOp::False: + return v.ir.Imm1(false); + case ComparisonOp::LessThan: + return v.ir.ILessThan(operand, zero, is_signed); + case ComparisonOp::Equal: + return v.ir.IEqual(operand, zero); + case ComparisonOp::LessThanEqual: + return v.ir.ILessThanEqual(operand, zero, is_signed); + case ComparisonOp::GreaterThan: + return v.ir.IGreaterThan(operand, zero, is_signed); + case ComparisonOp::NotEqual: + return v.ir.INotEqual(operand, zero); + case ComparisonOp::GreaterThanEqual: + return v.ir.IGreaterThanEqual(operand, zero, is_signed); + case ComparisonOp::True: + return v.ir.Imm1(true); + default: + throw NotImplementedException("ICMP.CMP"); + } +} + +void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<48, 1, u64> is_signed; + BitField<49, 3, ComparisonOp> compare_op; + } const icmp{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const bool is_signed{icmp.is_signed != 0}; + const IR::U1 cmp_result{CompareToZero(v, operand, icmp.compare_op, is_signed)}; + + const IR::U32 src_reg{v.X(icmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(icmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ICMP_reg(u64 insn) { + ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_rc(u64 insn) { + ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::ICMP_cr(u64 insn) { + ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_imm(u64 insn) { + ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 615e3c3b5..8d4044ee8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -389,22 +389,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::ICMP_reg(u64) { - ThrowNotImplemented(Opcode::ICMP_reg); -} - -void TranslatorVisitor::ICMP_rc(u64) { - ThrowNotImplemented(Opcode::ICMP_rc); -} - -void TranslatorVisitor::ICMP_cr(u64) { - ThrowNotImplemented(Opcode::ICMP_cr); -} - -void TranslatorVisitor::ICMP_imm(u64) { - ThrowNotImplemented(Opcode::ICMP_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } From bec7d3111d3de2a7a8384b1e761bc3692afef9c7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 00:25:15 -0500 Subject: [PATCH 044/770] shader: Make IMNMX, SHR, SEL stylistically more consistent --- .../maxwell/translate/impl/integer_minimum_maximum.cpp | 2 +- .../frontend/maxwell/translate/impl/integer_shift_right.cpp | 4 ++-- .../maxwell/translate/impl/select_source_with_predicate.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 12c6aae3d..5303db612 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -23,7 +23,7 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { throw NotImplementedException("IMNMX.MODE"); } - IR::U1 pred = v.ir.GetPred(imnmx.pred); + IR::U1 pred{v.ir.GetPred(imnmx.pred)}; const IR::U32 op_a{v.X(imnmx.src_reg)}; IR::U32 min; IR::U32 max; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp index a34ccb851..4025b1358 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -16,7 +16,7 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { BitField<39, 1, u64> is_wrapped; BitField<40, 1, u64> brev; BitField<43, 1, u64> xmode; - BitField<48, 1, u64> is_arithmetic; + BitField<48, 1, u64> is_signed; } const shr{insn}; if (shr.xmode != 0) { @@ -29,7 +29,7 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { } IR::U32 result; const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); - if (shr.is_arithmetic == 1) { + if (shr.is_signed == 1) { result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; } else { result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp index 25fc6b437..93baa75a9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -13,13 +13,13 @@ void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> op_a; + BitField<8, 8, IR::Reg> src_reg; BitField<39, 3, IR::Pred> pred; BitField<42, 1, u64> neg_pred; } const sel{insn}; const IR::U1 pred = v.ir.GetPred(sel.pred); - IR::U32 op_a{v.X(sel.op_a)}; + IR::U32 op_a{v.X(sel.src_reg)}; IR::U32 op_b{src}; if (sel.neg_pred != 0) { std::swap(op_a, op_b); From e0389286165805258fa2e54014c2dc506ffb9f35 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 01:30:09 -0500 Subject: [PATCH 045/770] shader: Implement ISET, add common_funcs --- src/shader_recompiler/CMakeLists.txt | 3 + .../maxwell/translate/impl/common_funcs.cpp | 46 ++++++++++++++ .../maxwell/translate/impl/common_funcs.h | 17 +++++ .../frontend/maxwell/translate/impl/impl.h | 19 ++++++ .../translate/impl/integer_compare.cpp | 39 +----------- .../impl/integer_compare_and_set.cpp | 62 +++++++++++++++++++ .../impl/integer_minimum_maximum.cpp | 2 +- .../translate/impl/not_implemented.cpp | 12 ---- 8 files changed, 150 insertions(+), 50 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index a227ce21b..89fc24f62 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -62,6 +62,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp frontend/maxwell/translate/impl/common_encoding.h + frontend/maxwell/translate/impl/common_funcs.cpp + frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -72,6 +74,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp frontend/maxwell/translate/impl/integer_compare.cpp + frontend/maxwell/translate/impl/integer_compare_and_set.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp frontend/maxwell/translate/impl/integer_scaled_add.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..3ec146b1a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, + const IR::U32& operand_2, ComparisonOp compare_op, + bool is_signed) { + switch (compare_op) { + case ComparisonOp::False: + return v.ir.Imm1(false); + case ComparisonOp::LessThan: + return v.ir.ILessThan(operand_1, operand_2, is_signed); + case ComparisonOp::Equal: + return v.ir.IEqual(operand_1, operand_2); + case ComparisonOp::LessThanEqual: + return v.ir.ILessThanEqual(operand_1, operand_2, is_signed); + case ComparisonOp::GreaterThan: + return v.ir.IGreaterThan(operand_1, operand_2, is_signed); + case ComparisonOp::NotEqual: + return v.ir.INotEqual(operand_1, operand_2); + case ComparisonOp::GreaterThanEqual: + return v.ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case ComparisonOp::True: + return v.ir.Imm1(true); + default: + throw NotImplementedException("CMP"); + } +} + +[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop) { + switch (bop) { + case BooleanOp::And: + return v.ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::Or: + return v.ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::Xor: + return v.ir.LogicalXor(predicate_1, predicate_2); + default: + throw NotImplementedException("BOP"); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..293fcce2e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -0,0 +1,17 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, + const IR::U32& operand_2, ComparisonOp compare_op, + bool is_signed); + +[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop); +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 672e140b2..232f8c894 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -9,6 +11,23 @@ namespace Shader::Maxwell { +enum class ComparisonOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +enum class BooleanOp : u64 { + And, + Or, + Xor, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp index 1f604b0ee..d844974d8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -4,46 +4,11 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { -enum class ComparisonOp : u64 { - False, - LessThan, - Equal, - LessThanEqual, - GreaterThan, - NotEqual, - GreaterThanEqual, - True, -}; - -[[nodiscard]] IR::U1 CompareToZero(TranslatorVisitor& v, const IR::U32& operand, - ComparisonOp compare_op, bool is_signed) { - const IR::U32 zero{v.ir.Imm32(0)}; - switch (compare_op) { - case ComparisonOp::False: - return v.ir.Imm1(false); - case ComparisonOp::LessThan: - return v.ir.ILessThan(operand, zero, is_signed); - case ComparisonOp::Equal: - return v.ir.IEqual(operand, zero); - case ComparisonOp::LessThanEqual: - return v.ir.ILessThanEqual(operand, zero, is_signed); - case ComparisonOp::GreaterThan: - return v.ir.IGreaterThan(operand, zero, is_signed); - case ComparisonOp::NotEqual: - return v.ir.INotEqual(operand, zero); - case ComparisonOp::GreaterThanEqual: - return v.ir.IGreaterThanEqual(operand, zero, is_signed); - case ComparisonOp::True: - return v.ir.Imm1(true); - default: - throw NotImplementedException("ICMP.CMP"); - } -} - void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { union { u64 insn; @@ -55,7 +20,7 @@ void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& o const IR::U32 zero{v.ir.Imm32(0)}; const bool is_signed{icmp.is_signed != 0}; - const IR::U1 cmp_result{CompareToZero(v, operand, icmp.compare_op, is_signed)}; + const IR::U1 cmp_result{IntegerCompare(v, operand, zero, icmp.compare_op, is_signed)}; const IR::U32 src_reg{v.X(icmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..b6a7b593d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> x; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, ComparisonOp> compare_op; + } const iset{insn}; + + if (iset.x != 0) { + throw NotImplementedException("ISET.X"); + } + + const IR::U32 src_reg{v.X(iset.src_reg)}; + const bool is_signed{iset.is_signed != 0}; + IR::U1 pred{v.ir.GetPred(iset.pred)}; + if (iset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{IntegerCompare(v, src_reg, src_a, iset.compare_op, is_signed)}; + const IR::U1 bop_result{PredicateCombine(v, cmp_result, pred, iset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; + + const IR::U32 result{v.ir.Select(bop_result, pass_result, fail_result)}; + + v.X(iset.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ISET_reg(u64 insn) { + ISET(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISET_cbuf(u64 insn) { + ISET(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISET_imm(u64 insn) { + ISET(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 5303db612..40f14ab8a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -23,7 +23,7 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { throw NotImplementedException("IMNMX.MODE"); } - IR::U1 pred{v.ir.GetPred(imnmx.pred)}; + const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; const IR::U32 op_a{v.X(imnmx.src_reg)}; IR::U32 min; IR::U32 max; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 8d4044ee8..f327e6fa5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -457,18 +457,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISET_reg(u64) { - ThrowNotImplemented(Opcode::ISET_reg); -} - -void TranslatorVisitor::ISET_cbuf(u64) { - ThrowNotImplemented(Opcode::ISET_cbuf); -} - -void TranslatorVisitor::ISET_imm(u64) { - ThrowNotImplemented(Opcode::ISET_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } From 103b9da4f7115ff47eee52d0dbd31b5b7a18b257 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 15:58:16 -0500 Subject: [PATCH 046/770] shader: Implement FLO --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_integer.cpp | 12 ++++- .../frontend/ir/ir_emitter.cpp | 12 ++++- .../frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/find_leading_one.cpp | 46 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ----- 8 files changed, 75 insertions(+), 18 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 89fc24f62..035fd34e2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -64,6 +64,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/common_funcs.cpp frontend/maxwell/translate/impl/common_funcs.h + frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4d00b235d..5446d6ab6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -229,7 +229,9 @@ Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id a); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitUMin32(EmitContext& ctx, Id a, Id b); Id EmitSMax32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 5bdd943a4..162fb6a91 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -110,8 +110,16 @@ Id EmitBitCount32(EmitContext& ctx, Id value) { return ctx.OpBitCount(ctx.U32[1], value); } -Id EmitBitwiseNot32(EmitContext& ctx, Id a) { - return ctx.OpNot(ctx.U32[1], a); +Id EmitBitwiseNot32(EmitContext& ctx, Id value) { + return ctx.OpNot(ctx.U32[1], value); +} + +Id EmitFindSMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindSMsb(ctx.U32[1], value); +} + +Id EmitFindUMsb32(EmitContext& ctx, Id value) { + return ctx.OpFindUMsb(ctx.U32[1], value); } Id EmitSMin32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 04edcdfd8..0f1cab57a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -812,8 +812,16 @@ U32 IREmitter::BitCount(const U32& value) { return Inst(Opcode::BitCount32, value); } -U32 IREmitter::BitwiseNot(const U32& a) { - return Inst(Opcode::BitwiseNot32, a); +U32 IREmitter::BitwiseNot(const U32& value) { + return Inst(Opcode::BitwiseNot32, value); +} + +U32 IREmitter::FindSMsb(const U32& value) { + return Inst(Opcode::FindSMsb32, value); +} + +U32 IREmitter::FindUMsb(const U32& value) { + return Inst(Opcode::FindUMsb32, value); } U32 IREmitter::SMin(const U32& a, const U32& b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 00ba2e4cd..03a67985f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -161,8 +161,10 @@ public: bool is_signed); [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitCount(const U32& value); - [[nodiscard]] U32 BitwiseNot(const U32& a); + [[nodiscard]] U32 BitwiseNot(const U32& value); + [[nodiscard]] U32 FindSMsb(const U32& value); + [[nodiscard]] U32 FindUMsb(const U32& value); [[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b); [[nodiscard]] U32 SMax(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 2c4a997dc..aedbc5c3e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -235,6 +235,8 @@ OPCODE(BitReverse32, U32, U32, OPCODE(BitCount32, U32, U32, ) OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) OPCODE(SMin32, U32, U32, U32, ) OPCODE(UMin32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..d5361bec5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FLO(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + BitField<41, 1, u64> shift; + BitField<48, 1, u64> is_signed; + } const flo{insn}; + + const bool invert{flo.tilde != 0}; + const bool is_signed{flo.is_signed != 0}; + const bool shift_op{flo.shift != 0}; + + const IR::U32 operand{invert ? v.ir.BitwiseNot(src) : src}; + const IR::U32 find_result{is_signed ? v.ir.FindSMsb(operand) : v.ir.FindUMsb(operand)}; + const IR::U1 find_fail{v.ir.IEqual(find_result, v.ir.Imm32(-1))}; + const IR::U32 offset{v.ir.Imm32(31)}; + const IR::U32 success_result{shift_op ? IR::U32{v.ir.ISub(offset, find_result)} : find_result}; + + const IR::U32 result{v.ir.Select(find_fail, find_result, success_result)}; + v.X(flo.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FLO_reg(u64 insn) { + FLO(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FLO_cbuf(u64 insn) { + FLO(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::FLO_imm(u64 insn) { + FLO(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index f327e6fa5..2da0b87c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -217,18 +217,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FLO_reg(u64) { - ThrowNotImplemented(Opcode::FLO_reg); -} - -void TranslatorVisitor::FLO_cbuf(u64) { - ThrowNotImplemented(Opcode::FLO_cbuf); -} - -void TranslatorVisitor::FLO_imm(u64) { - ThrowNotImplemented(Opcode::FLO_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } From ce9b116cfe4fcd96df889ed8997c93c6cd2a502c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 01:05:57 -0500 Subject: [PATCH 047/770] Implement PSET, refactor common comparison funcs --- src/shader_recompiler/CMakeLists.txt | 1 + .../maxwell/translate/impl/common_funcs.cpp | 54 ++++++++-------- .../maxwell/translate/impl/common_funcs.h | 7 +- .../frontend/maxwell/translate/impl/impl.h | 8 +-- .../translate/impl/integer_compare.cpp | 4 +- .../impl/integer_compare_and_set.cpp | 6 +- .../translate/impl/integer_set_predicate.cpp | 64 ++----------------- .../translate/impl/not_implemented.cpp | 4 -- .../maxwell/translate/impl/predicate_set.cpp | 41 ++++++++++++ 9 files changed, 88 insertions(+), 101 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 035fd34e2..057857299 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -89,6 +89,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/predicate_set.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 3ec146b1a..62f825a92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -5,42 +5,42 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, - const IR::U32& operand_2, ComparisonOp compare_op, +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed) { switch (compare_op) { - case ComparisonOp::False: - return v.ir.Imm1(false); - case ComparisonOp::LessThan: - return v.ir.ILessThan(operand_1, operand_2, is_signed); - case ComparisonOp::Equal: - return v.ir.IEqual(operand_1, operand_2); - case ComparisonOp::LessThanEqual: - return v.ir.ILessThanEqual(operand_1, operand_2, is_signed); - case ComparisonOp::GreaterThan: - return v.ir.IGreaterThan(operand_1, operand_2, is_signed); - case ComparisonOp::NotEqual: - return v.ir.INotEqual(operand_1, operand_2); - case ComparisonOp::GreaterThanEqual: - return v.ir.IGreaterThanEqual(operand_1, operand_2, is_signed); - case ComparisonOp::True: - return v.ir.Imm1(true); + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return ir.ILessThan(operand_1, operand_2, is_signed); + case CompareOp::Equal: + return ir.IEqual(operand_1, operand_2); + case CompareOp::LessThanEqual: + return ir.ILessThanEqual(operand_1, operand_2, is_signed); + case CompareOp::GreaterThan: + return ir.IGreaterThan(operand_1, operand_2, is_signed); + case CompareOp::NotEqual: + return ir.INotEqual(operand_1, operand_2); + case CompareOp::GreaterThanEqual: + return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case CompareOp::True: + return ir.Imm1(true); default: - throw NotImplementedException("CMP"); + throw NotImplementedException("Invalid compare op {}", compare_op); } } -[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { - case BooleanOp::And: - return v.ir.LogicalAnd(predicate_1, predicate_2); - case BooleanOp::Or: - return v.ir.LogicalOr(predicate_1, predicate_2); - case BooleanOp::Xor: - return v.ir.LogicalXor(predicate_1, predicate_2); + case BooleanOp::AND: + return ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::OR: + return ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::XOR: + return ir.LogicalXor(predicate_1, predicate_2); default: - throw NotImplementedException("BOP"); + throw NotImplementedException("Invalid bop {}", bop); } } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index 293fcce2e..61e13fa18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -8,10 +8,9 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, - const IR::U32& operand_2, ComparisonOp compare_op, - bool is_signed); +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed); -[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 232f8c894..ad09ade7c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { -enum class ComparisonOp : u64 { +enum class CompareOp : u64 { False, LessThan, Equal, @@ -23,9 +23,9 @@ enum class ComparisonOp : u64 { }; enum class BooleanOp : u64 { - And, - Or, - Xor, + AND, + OR, + XOR, }; class TranslatorVisitor { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp index d844974d8..ba6e01926 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -15,12 +15,12 @@ void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& o BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; BitField<48, 1, u64> is_signed; - BitField<49, 3, ComparisonOp> compare_op; + BitField<49, 3, CompareOp> compare_op; } const icmp{insn}; const IR::U32 zero{v.ir.Imm32(0)}; const bool is_signed{icmp.is_signed != 0}; - const IR::U1 cmp_result{IntegerCompare(v, operand, zero, icmp.compare_op, is_signed)}; + const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; const IR::U32 src_reg{v.X(icmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index b6a7b593d..914af010f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -20,7 +20,7 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; - BitField<49, 3, ComparisonOp> compare_op; + BitField<49, 3, CompareOp> compare_op; } const iset{insn}; if (iset.x != 0) { @@ -33,8 +33,8 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (iset.neg_pred != 0) { pred = v.ir.LogicalNot(pred); } - const IR::U1 cmp_result{IntegerCompare(v, src_reg, src_a, iset.compare_op, is_signed)}; - const IR::U1 bop_result{PredicateCombine(v, cmp_result, pred, iset.bop)}; + const IR::U1 cmp_result{IntegerCompare(v.ir, src_reg, src_a, iset.compare_op, is_signed)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 1bc9ef363..7743701d0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -4,62 +4,11 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { -enum class CompareOp : u64 { - F, // Always false - LT, // Less than - EQ, // Equal - LE, // Less than or equal - GT, // Greater than - NE, // Not equal - GE, // Greater than or equal - T, // Always true -}; - -enum class Bop : u64 { - AND, - OR, - XOR, -}; - -IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, - bool is_signed) { - switch (op) { - case CompareOp::F: - return ir.Imm1(false); - case CompareOp::LT: - return ir.ILessThan(lhs, rhs, is_signed); - case CompareOp::EQ: - return ir.IEqual(lhs, rhs); - case CompareOp::LE: - return ir.ILessThanEqual(lhs, rhs, is_signed); - case CompareOp::GT: - return ir.IGreaterThan(lhs, rhs, is_signed); - case CompareOp::NE: - return ir.INotEqual(lhs, rhs); - case CompareOp::GE: - return ir.IGreaterThanEqual(lhs, rhs, is_signed); - case CompareOp::T: - return ir.Imm1(true); - } - throw NotImplementedException("Invalid ISETP compare op {}", op); -} - -IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { - switch (bop) { - case Bop::AND: - return ir.LogicalAnd(comparison, bop_pred); - case Bop::OR: - return ir.LogicalOr(comparison, bop_pred); - case Bop::XOR: - return ir.LogicalXor(comparison, bop_pred); - } - throw NotImplementedException("Invalid ISETP bop {}", bop); -} - void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -68,17 +17,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; - BitField<45, 2, Bop> bop; + BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; - const Bop bop{isetp.bop}; + const BooleanOp bop{isetp.bop}; + const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; - const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; - const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; v.ir.SetPred(isetp.dest_pred_a, result_a); v.ir.SetPred(isetp.dest_pred_b, result_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2da0b87c4..291d7a4bc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -593,10 +593,6 @@ void TranslatorVisitor::PRMT_imm(u64) { ThrowNotImplemented(Opcode::PRMT_imm); } -void TranslatorVisitor::PSET(u64) { - ThrowNotImplemented(Opcode::PSET); -} - void TranslatorVisitor::PSETP(u64) { ThrowNotImplemented(Opcode::PSETP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp new file mode 100644 index 000000000..6c15963fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 false_result{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + + X(pset.dest_reg, result); +} + +} // namespace Shader::Maxwell From c2155f04d4220b71432b046694983963036ab6e2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 13:42:09 -0500 Subject: [PATCH 048/770] shader: Implement PSETP --- src/shader_recompiler/CMakeLists.txt | 3 +- .../translate/impl/not_implemented.cpp | 4 -- .../impl/predicate_set_predicate.cpp | 38 +++++++++++++++++++ ...ate_set.cpp => predicate_set_register.cpp} | 0 4 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp rename src/shader_recompiler/frontend/maxwell/translate/impl/{predicate_set.cpp => predicate_set_register.cpp} (100%) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 057857299..981a79e44 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -89,7 +89,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp - frontend/maxwell/translate/impl/predicate_set.cpp + frontend/maxwell/translate/impl/predicate_set_predicate.cpp + frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 291d7a4bc..91a9858c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -593,10 +593,6 @@ void TranslatorVisitor::PRMT_imm(u64) { ThrowNotImplemented(Opcode::PRMT_imm); } -void TranslatorVisitor::PSETP(u64) { - ThrowNotImplemented(Opcode::PSETP); -} - void TranslatorVisitor::R2B(u64) { ThrowNotImplemented(Opcode::R2B); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; + const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; + const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; + + ir.SetPred(pset.dest_pred_a, result_a); + ir.SetPred(pset.dest_pred_b, result_b); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp similarity index 100% rename from src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp rename to src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp From 382cba94ed52f4fae7db437a3056563ba2110e8b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 14:59:28 -0500 Subject: [PATCH 049/770] shader: Implement IADD3 --- src/shader_recompiler/CMakeLists.txt | 1 + .../impl/integer_add_three_input.cpp | 103 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 -- 3 files changed, 104 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 981a79e44..8a0f73a4d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -74,6 +74,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_add_three_input.cpp frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_compare_and_set.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..c2dbd7998 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -0,0 +1,103 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Shift : u64 { + None, + Right, + Left, +}; +enum class Half : u64 { + All, + Lower, + Upper, +}; + +[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { + constexpr bool is_signed{false}; + switch (half) { + case Half::Lower: + return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); + case Half::Upper: + return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); + default: + return value; + } +} + +[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { + switch (shift) { + case Shift::Right: + return ir.ShiftRightLogical(value, ir.Imm32(16)); + case Shift::Left: + return ir.ShiftLeftLogical(value, ir.Imm32(16)); + default: + return value; + } +} + +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + BitField<31, 2, Half> half_c; + BitField<33, 2, Half> half_b; + BitField<35, 2, Half> half_a; + BitField<37, 2, Shift> shift; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> x; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> neg_b; + BitField<51, 1, u64> neg_a; + } iadd3{insn}; + + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X"); + } + if (iadd3.cc != 0) { + throw NotImplementedException("IADD3 CC"); + } + + IR::U32 op_a{v.X(iadd3.src_a)}; + op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); + op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); + op_c = IntegerHalf(v.ir, op_c, iadd3.half_c); + + if (iadd3.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iadd3.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + if (iadd3.neg_c != 0) { + op_c = v.ir.INeg(op_c); + } + + IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; + lhs = IntegerShift(v.ir, lhs, iadd3.shift); + const IR::U32 result{v.ir.IAdd(lhs, op_c)}; + + v.X(iadd3.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD3_reg(u64 insn) { + IADD3(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_cbuf(u64 insn) { + IADD3(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_imm(u64 insn) { + IADD3(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 91a9858c6..c93304a67 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -365,18 +365,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD3_reg(u64) { - ThrowNotImplemented(Opcode::IADD3_reg); -} - -void TranslatorVisitor::IADD3_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD3_cbuf); -} - -void TranslatorVisitor::IADD3_imm(u64) { - ThrowNotImplemented(Opcode::IADD3_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } From 980cafdc27444484a2a2794be5de92ea18de6e27 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 3 Mar 2021 00:41:05 -0500 Subject: [PATCH 050/770] shader: Implement LOP and LOP3 --- src/shader_recompiler/CMakeLists.txt | 2 + .../frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/common_funcs.cpp | 25 +++- .../maxwell/translate/impl/common_funcs.h | 2 + .../frontend/maxwell/translate/impl/impl.h | 7 ++ .../translate/impl/logic_operation.cpp | 77 ++++++++++++ .../impl/logic_operation_three_input.cpp | 117 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 24 ---- 8 files changed, 227 insertions(+), 31 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8a0f73a4d..6506413a8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -86,6 +86,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp + frontend/maxwell/translate/impl/logic_operation.cpp + frontend/maxwell/translate/impl/logic_operation_three_input.cpp frontend/maxwell/translate/impl/move_predicate_to_register.cpp frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1515285bf..5d0b91598 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -178,8 +178,8 @@ INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") -INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----") -INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") INST(LOP32I, "LOP32I", "0000 01-- ---- ----") INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 62f825a92..9d4ac2e36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -5,9 +5,8 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, - const IR::U32& operand_2, CompareOp compare_op, - bool is_signed) { +IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { switch (compare_op) { case CompareOp::False: return ir.Imm1(false); @@ -30,8 +29,8 @@ namespace Shader::Maxwell { } } -[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, - const IR::U1& predicate_2, BooleanOp bop) { +IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, + BooleanOp bop) { switch (bop) { case BooleanOp::AND: return ir.LogicalAnd(predicate_1, predicate_2); @@ -43,4 +42,20 @@ namespace Shader::Maxwell { throw NotImplementedException("Invalid bop {}", bop); } } + +IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { + switch (op) { + case PredicateOp::False: + return ir.Imm1(false); + case PredicateOp::True: + return ir.Imm1(true); + case PredicateOp::Zero: + return ir.IEqual(result, ir.Imm32(0)); + case PredicateOp::NonZero: + return ir.INotEqual(result, ir.Imm32(0)); + default: + throw NotImplementedException("Invalid Predicate operation {}", op); + } +} + } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index 61e13fa18..c9ae5c500 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -13,4 +13,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); + +[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ad09ade7c..c6253c40c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -28,6 +28,13 @@ enum class BooleanOp : u64 { XOR, }; +enum class PredicateOp : u64 { + False, + True, + Zero, + NonZero, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..e786a388e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -0,0 +1,77 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LogicalOp : u64 { + AND, + OR, + XOR, + PASS_B, +}; + +[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, LogicalOp op) { + switch (op) { + case LogicalOp::AND: + return ir.BitwiseAnd(operand_1, operand_2); + case LogicalOp::OR: + return ir.BitwiseOr(operand_1, operand_2); + case LogicalOp::XOR: + return ir.BitwiseXor(operand_1, operand_2); + case LogicalOp::PASS_B: + return operand_2; + default: + throw NotImplementedException("Invalid Logical operation {}", op); + } +} + +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 1, u64> neg_a; + BitField<40, 1, u64> neg_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop{insn}; + + if (lop.x != 0) { + throw NotImplementedException("LOP X"); + } + IR::U32 op_a{v.X(lop.src_reg)}; + if (lop.neg_a != 0) { + op_a = v.ir.BitwiseNot(op_a); + } + if (lop.neg_b != 0) { + op_b = v.ir.BitwiseNot(op_b); + } + + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; + const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; + v.X(lop.dest_reg, result); + v.ir.SetPred(lop.pred, pred_result); +} +} // Anonymous namespace + +void TranslatorVisitor::LOP_reg(u64 insn) { + LOP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LOP_cbuf(u64 insn) { + LOP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LOP_imm(u64 insn) { + LOP(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..256c47504 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -0,0 +1,117 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 +// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) +IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, + u64 ttbl) { + IR::U32 r{ir.Imm32(0)}; + const IR::U32 not_a{ir.BitwiseNot(a)}; + const IR::U32 not_b{ir.BitwiseNot(b)}; + const IR::U32 not_c{ir.BitwiseNot(c)}; + if (ttbl & 0x01) { + // r |= ~a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x02) { + // r |= ~a & ~b & c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x04) { + // r |= ~a & b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x08) { + // r |= ~a & b & c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x10) { + // r |= a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x20) { + // r |= a & ~b & c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x40) { + // r |= a & b & ~c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x80) { + // r |= a & b & c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + return r; +} + +IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + } const lop3{insn}; + + const IR::U32 op_a{v.X(lop3.src_reg)}; + const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; + v.X(lop3.dest_reg, result); + return result; +} + +u64 GetLut48(u64 insn) { + union { + u64 raw; + BitField<48, 8, u64> lut; + } const lut{insn}; + return lut.lut; +} +} // Anonymous namespace + +void TranslatorVisitor::LOP3_reg(u64 insn) { + union { + u64 insn; + BitField<28, 8, u64> lut; + BitField<38, 1, u64> x; + BitField<36, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop3{insn}; + + if (lop3.x != 0) { + throw NotImplementedException("LOP3 X"); + } + const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; + const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; + ir.SetPred(lop3.pred, pred_result); +} + +void TranslatorVisitor::LOP3_cbuf(u64 insn) { + LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); +} + +void TranslatorVisitor::LOP3_imm(u64 insn) { + LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c93304a67..a0535f1c2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -493,30 +493,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::LOP_reg(u64) { - ThrowNotImplemented(Opcode::LOP_reg); -} - -void TranslatorVisitor::LOP_cbuf(u64) { - ThrowNotImplemented(Opcode::LOP_cbuf); -} - -void TranslatorVisitor::LOP_imm(u64) { - ThrowNotImplemented(Opcode::LOP_imm); -} - -void TranslatorVisitor::LOP3_reg(u64) { - ThrowNotImplemented(Opcode::LOP3_reg); -} - -void TranslatorVisitor::LOP3_cbuf(u64) { - ThrowNotImplemented(Opcode::LOP3_cbuf); -} - -void TranslatorVisitor::LOP3_imm(u64) { - ThrowNotImplemented(Opcode::LOP3_imm); -} - void TranslatorVisitor::LOP32I(u64) { ThrowNotImplemented(Opcode::LOP32I); } From 4006929c986a2e0e52429fe21201a7ad5ca3fea9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Mar 2021 03:07:19 -0300 Subject: [PATCH 051/770] shader: Implement HADD2 --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 28 ++- .../backend/spirv/emit_spirv_composite.cpp | 72 +++++-- .../backend/spirv/emit_spirv_convert.cpp | 16 ++ .../frontend/ir/ir_emitter.cpp | 90 ++++++++- .../frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 17 +- .../impl/half_floating_point_add.cpp | 184 ++++++++++++++++++ .../translate/impl/load_store_memory.cpp | 2 +- .../translate/impl/not_implemented.cpp | 16 -- .../global_memory_to_storage_buffer_pass.cpp | 2 +- .../ir_opt/lower_fp16_to_fp32.cpp | 10 + 12 files changed, 400 insertions(+), 42 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6506413a8..cb73e03fb 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp + frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5446d6ab6..bed43c094 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -90,24 +90,36 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -void EmitCompositeConstructF16x2(EmitContext& ctx); -void EmitCompositeConstructF16x3(EmitContext& ctx); -void EmitCompositeConstructF16x4(EmitContext& ctx); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx); -void EmitCompositeConstructF32x3(EmitContext& ctx); -void EmitCompositeConstructF32x4(EmitContext& ctx); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); @@ -270,5 +282,9 @@ Id EmitConvertU64F32(EmitContext& ctx, Id value); Id EmitConvertU64F64(EmitContext& ctx, Id value); Id EmitConvertU64U32(EmitContext& ctx, Id value); Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index c950854a0..616e63676 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -30,16 +30,28 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -void EmitCompositeConstructF16x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); } -void EmitCompositeConstructF16x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); } -void EmitCompositeConstructF16x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); } Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { @@ -54,16 +66,28 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeConstructF32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); } -void EmitCompositeConstructF32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); } -void EmitCompositeConstructF32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); } Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { @@ -78,6 +102,18 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } @@ -102,4 +138,16 @@ void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 76ccaffce..edcc2a1cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -86,4 +86,20 @@ Id EmitConvertU32U64(EmitContext& ctx, Id value) { return ctx.OpUConvert(ctx.U32[1], value); } +Id EmitConvertF16F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F16[1], value); +} + +Id EmitConvertF32F16(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF32F64(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F32[1], value); +} + +Id EmitConvertF64F32(EmitContext& ctx, Id value) { + return ctx.OpFConvert(ctx.F64[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0f1cab57a..186920d8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu } Value IREmitter::CompositeExtract(const Value& vector, size_t element) { - const auto read = [&](Opcode opcode, size_t limit) -> Value { + const auto read{[&](Opcode opcode, size_t limit) -> Value { if (element >= limit) { throw InvalidArgument("Out of bounds element {}", element); } return Inst(opcode, vector, Value{static_cast(element)}); - }; + }}; switch (vector.Type()) { case Type::U32x2: return read(Opcode::CompositeExtractU32x2, 2); @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { } } +Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { + const auto insert{[&](Opcode opcode, size_t limit) { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, object, Value{static_cast(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return insert(Opcode::CompositeInsertU32x2, 2); + case Type::U32x3: + return insert(Opcode::CompositeInsertU32x3, 3); + case Type::U32x4: + return insert(Opcode::CompositeInsertU32x4, 4); + case Type::F16x2: + return insert(Opcode::CompositeInsertF16x2, 2); + case Type::F16x3: + return insert(Opcode::CompositeInsertF16x3, 3); + case Type::F16x4: + return insert(Opcode::CompositeInsertF16x4, 4); + case Type::F32x2: + return insert(Opcode::CompositeInsertF32x2, 2); + case Type::F32x3: + return insert(Opcode::CompositeInsertF32x3, 3); + case Type::F32x4: + return insert(Opcode::CompositeInsertF32x4, 4); + case Type::F64x2: + return insert(Opcode::CompositeInsertF64x2, 2); + case Type::F64x3: + return insert(Opcode::CompositeInsertF64x3, 3); + case Type::F64x4: + return insert(Opcode::CompositeInsertF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { } Value IREmitter::UnpackFloat2x16(const U32& value) { - return Inst(Opcode::UnpackFloat2x16, value); + return Inst(Opcode::UnpackFloat2x16, value); } F64 IREmitter::PackDouble2x32(const Value& vector) { @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { +U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: switch (value.Type()) { @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { + switch (result_bitsize) { + case 16: + switch (value.Type()) { + case Type::F16: + // Nothing to do + return value; + case Type::F32: + return Inst(Opcode::ConvertF16F32, value); + case Type::F64: + throw LogicError("Illegal conversion from F64 to F16"); + default: + break; + } + break; + case 32: + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::ConvertF32F16, value); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst(Opcode::ConvertF32F64, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::F16: + throw LogicError("Illegal conversion from F16 to F64"); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst(Opcode::ConvertF32F64, value); + default: + break; + } + break; + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 03a67985f..5beb99895 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -97,6 +97,7 @@ public: [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, const Value& e4); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); @@ -186,7 +187,8 @@ public: [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); + [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aedbc5c3e..acfc0a829 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32, OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations OPCODE(SelectU8, U8, U1, U8, U8, ) @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64, OPCODE(ConvertU64F16, U64, F16, ) OPCODE(ConvertU64F32, U64, F32, ) OPCODE(ConvertU64F64, U64, F64, ) - OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -0,0 +1,184 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hadd2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<32, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, Swizzle> swizzle_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + GetReg20(insn)); +} + +void TranslatorVisitor::HADD2_cbuf(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> abs_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + GetCbuf(insn)); +} + +void TranslatorVisitor::HADD2_imm(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HADD2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.imm32)}; + HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, + hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { const IR::U64 address{[&]() -> IR::U64 { if (mem.e == 0) { // LDG/STG without .E uses a 32-bit pointer, zero-extend it - return v.ir.ConvertU(64, v.X(mem.addr_reg)); + return v.ir.UConvert(64, v.X(mem.addr_reg)); } if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HADD2_reg(u64) { - ThrowNotImplemented(Opcode::HADD2_reg); -} - -void TranslatorVisitor::HADD2_cbuf(u64) { - ThrowNotImplemented(Opcode::HADD2_cbuf); -} - -void TranslatorVisitor::HADD2_imm(u64) { - ThrowNotImplemented(Opcode::HADD2_imm); -} - -void TranslatorVisitor::HADD2_32I(u64) { - ThrowNotImplemented(Opcode::HADD2_32I); -} - void TranslatorVisitor::HFMA2_reg(u64) { ThrowNotImplemented(Opcode::HFMA2_reg); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 98e3dfef7..965e52135 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); + offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index c7032f168..14a5cb50f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::CompositeExtractF32x3; case IR::Opcode::CompositeExtractF16x4: return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::CompositeInsertF16x2: + return IR::Opcode::CompositeInsertF32x2; + case IR::Opcode::CompositeInsertF16x3: + return IR::Opcode::CompositeInsertF32x3; + case IR::Opcode::CompositeInsertF16x4: + return IR::Opcode::CompositeInsertF32x4; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: @@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::PackHalf2x16; case IR::Opcode::UnpackFloat2x16: return IR::Opcode::UnpackHalf2x16; + case IR::Opcode::ConvertF32F16: + return IR::Opcode::Identity; + case IR::Opcode::ConvertF16F32: + return IR::Opcode::Identity; default: return op; } From 81f72471e831a0bc4205df6df61e5b510a5c25ac Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 4 Mar 2021 01:02:44 -0500 Subject: [PATCH 052/770] shader: Implement I2I --- src/shader_recompiler/CMakeLists.txt | 1 + .../impl/integer_to_integer_conversion.cpp | 99 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 --- 3 files changed, 100 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index cb73e03fb..51c245a63 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -85,6 +85,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_shift_left.cpp frontend/maxwell/translate/impl/integer_shift_right.cpp frontend/maxwell/translate/impl/integer_short_multiply_add.cpp + frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/logic_operation.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..ca28c6dd9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class IntegerWidth : u64 { + Byte, + Short, + Word, +}; + +[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { + switch (width) { + case IntegerWidth::Byte: + return ir.Imm32(8); + case IntegerWidth::Short: + return ir.Imm32(16); + case IntegerWidth::Word: + return ir.Imm32(32); + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, + IntegerWidth dst_width) { + const IR::U32 zero{ir.Imm32(0)}; + switch (dst_width) { + case IntegerWidth::Byte: + return ir.BitFieldExtract(src, zero, ir.Imm32(8), false); + case IntegerWidth::Short: + return ir.BitFieldExtract(src, zero, ir.Imm32(16), false); + case IntegerWidth::Word: + return ir.BitFieldExtract(src, zero, ir.Imm32(32), false); + default: + throw NotImplementedException("Invalid width {}", dst_width); + } +} + +void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, IntegerWidth> dst_fmt; + BitField<12, 1, u64> dst_fmt_sign; + BitField<10, 2, IntegerWidth> src_fmt; + BitField<13, 1, u64> src_fmt_sign; + BitField<41, 3, u64> selector; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + BitField<50, 1, u64> sat; + } const i2i{insn}; + + if (i2i.sat != 0) { + throw NotImplementedException("I2I SAT"); + } + if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { + throw NotImplementedException("16-bit source format incompatible with selector {}", + i2i.selector); + } + if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { + throw NotImplementedException("32-bit source format incompatible with selector {}", + i2i.selector); + } + + const s32 selector{static_cast(i2i.selector)}; + const IR::U32 offset{v.ir.Imm32(selector * 8)}; + const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, i2i.src_fmt_sign != 0)}; + if (i2i.abs) { + src_values = v.ir.IAbs(src_values); + } + if (i2i.neg) { + src_values = v.ir.INeg(src_values); + } + + const IR::U32 result{ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; + v.X(i2i.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::I2I_reg(u64 insn) { + I2I(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::I2I_cbuf(u64 insn) { + I2I(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::I2I_imm(u64 insn) { + I2I(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c24f29ff7..bd7a7a8b7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -337,18 +337,6 @@ void TranslatorVisitor::I2F_imm(u64) { ThrowNotImplemented(Opcode::I2F_imm); } -void TranslatorVisitor::I2I_reg(u64) { - ThrowNotImplemented(Opcode::I2I_reg); -} - -void TranslatorVisitor::I2I_cbuf(u64) { - ThrowNotImplemented(Opcode::I2I_cbuf); -} - -void TranslatorVisitor::I2I_imm(u64) { - ThrowNotImplemented(Opcode::I2I_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } From d1edc16ba87f3247ad220042050bfea2999067ff Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 4 Mar 2021 20:12:44 -0300 Subject: [PATCH 053/770] shader: Deduplicate HADD2 code --- .../impl/half_floating_point_add.cpp | 39 +++++++++---------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 6965adfb3..c292d5e87 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -107,54 +107,52 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool } v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); } + +void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, + const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hadd2{insn}; + + HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, + hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); +} } // Anonymous namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<32, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<31, 1, u64> neg_b; BitField<30, 1, u64> abs_b; BitField<28, 2, Swizzle> swizzle_b; } const hadd2{insn}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, GetReg20(insn)); } void TranslatorVisitor::HADD2_cbuf(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<52, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<56, 1, u64> neg_b; BitField<54, 1, u64> abs_b; } const hadd2{insn}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, GetCbuf(insn)); } void TranslatorVisitor::HADD2_imm(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<52, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<56, 1, u64> neg_high; BitField<30, 9, u64> high; BitField<29, 1, u64> neg_low; @@ -163,8 +161,7 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); + HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } void TranslatorVisitor::HADD2_32I(u64 insn) { From 5465cb156107a27df525dfedbfd4e920b7f71253 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 5 Mar 2021 01:15:16 -0500 Subject: [PATCH 054/770] shader: Implement LEA --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 4 +- .../spirv/emit_spirv_bitwise_conversion.cpp | 4 +- .../backend/spirv/emit_spirv_integer.cpp | 8 ++ .../frontend/ir/ir_emitter.cpp | 22 +++- .../frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/load_effective_address.cpp | 100 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 20 ---- 9 files changed, 136 insertions(+), 29 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 51c245a63..a8df03f90 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -86,6 +86,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_shift_right.cpp frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp + frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/logic_operation.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index bed43c094..1f7d84871 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -132,7 +132,7 @@ void EmitBitCastU64F64(EmitContext& ctx); void EmitBitCastF16U16(EmitContext& ctx); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); -void EmitPackUint2x32(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); Id EmitUnpackUint2x32(EmitContext& ctx, Id value); Id EmitPackFloat2x16(EmitContext& ctx, Id value); Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); @@ -229,9 +229,11 @@ Id EmitISub32(EmitContext& ctx, Id a, Id b); void EmitISub64(EmitContext& ctx); Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical32(EmitContext& ctx, Id a, Id b); +Id EmitShiftRightLogical64(EmitContext& ctx, Id a, Id b); Id EmitShiftRightArithmetic32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index e0d1ba413..93a45d834 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -30,8 +30,8 @@ void EmitBitCastF64U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitPackUint2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U64, value); } Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 162fb6a91..f5001cdaa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -62,6 +62,10 @@ Id EmitINeg32(EmitContext& ctx, Id value) { return ctx.OpSNegate(ctx.U32[1], value); } +Id EmitINeg64(EmitContext& ctx, Id value) { + return ctx.OpSNegate(ctx.U64, value); +} + Id EmitIAbs32(EmitContext& ctx, Id value) { return ctx.OpSAbs(ctx.U32[1], value); } @@ -74,6 +78,10 @@ Id EmitShiftRightLogical32(EmitContext& ctx, Id a, Id b) { return ctx.OpShiftRightLogical(ctx.U32[1], a, b); } +Id EmitShiftRightLogical64(EmitContext& ctx, Id a, Id b) { + return ctx.OpShiftRightLogical(ctx.U64, a, b); +} + Id EmitShiftRightArithmetic32(EmitContext& ctx, Id a, Id b) { return ctx.OpShiftRightArithmetic(ctx.U32[1], a, b); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 186920d8f..01f52183c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -798,8 +798,15 @@ U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst(Opcode::IMul32, a, b); } -U32 IREmitter::INeg(const U32& value) { - return Inst(Opcode::INeg32, value); +U32U64 IREmitter::INeg(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::INeg32, value); + case Type::U64: + return Inst(Opcode::INeg64, value); + default: + ThrowInvalidType(value.Type()); + } } U32 IREmitter::IAbs(const U32& value) { @@ -810,8 +817,15 @@ U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { return Inst(Opcode::ShiftLeftLogical32, base, shift); } -U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { - return Inst(Opcode::ShiftRightLogical32, base, shift); +U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftRightLogical32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftRightLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } } U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5beb99895..33bf2a7d0 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -148,10 +148,10 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); - [[nodiscard]] U32 INeg(const U32& value); + [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); - [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index acfc0a829..b51aaaef5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -233,9 +233,11 @@ OPCODE(ISub32, U32, U32, OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) OPCODE(BitwiseAnd32, U32, U32, U32, ) OPCODE(BitwiseOr32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..784588e83 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, + bool neg, bool x) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + + if (x) { + throw NotImplementedException("LEA.HI X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; + const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; + + const s32 hi_scale{32 - static_cast(scale)}; + const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; + const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; + v.X(lea.dest_reg, result); +} + +void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<39, 5, u64> scale; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> x; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + if (lea.x != 0) { + throw NotImplementedException("LEA.LO X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const s32 scale{static_cast(lea.scale)}; + const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; + const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset)}; + v.X(lea.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::LEA_hi_reg(u64 insn) { + union { + u64 insn; + BitField<28, 5, u64> scale; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { + union { + u64 insn; + BitField<51, 5, u64> scale; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_lo_reg(u64 insn) { + LEA_lo(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { + LEA_lo(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LEA_lo_imm(u64 insn) { + LEA_lo(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index bd7a7a8b7..62863aff6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -437,26 +437,6 @@ void TranslatorVisitor::LDS(u64) { ThrowNotImplemented(Opcode::LDS); } -void TranslatorVisitor::LEA_hi_reg(u64) { - ThrowNotImplemented(Opcode::LEA_hi_reg); -} - -void TranslatorVisitor::LEA_hi_cbuf(u64) { - ThrowNotImplemented(Opcode::LEA_hi_cbuf); -} - -void TranslatorVisitor::LEA_lo_reg(u64) { - ThrowNotImplemented(Opcode::LEA_lo_reg); -} - -void TranslatorVisitor::LEA_lo_cbuf(u64) { - ThrowNotImplemented(Opcode::LEA_lo_cbuf); -} - -void TranslatorVisitor::LEA_lo_imm(u64) { - ThrowNotImplemented(Opcode::LEA_lo_imm); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } From 924f0a9149b6777782347be3d2c833a5f8e90058 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 7 Mar 2021 14:48:03 -0500 Subject: [PATCH 055/770] shader: Implement SHF --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 8 +- .../backend/spirv/emit_spirv_integer.cpp | 20 +++-- .../frontend/ir/ir_emitter.cpp | 22 +++++- .../frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/integer_funnel_shift.cpp | 77 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 16 ---- 8 files changed, 119 insertions(+), 31 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index a8df03f90..ddd34e915 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -78,6 +78,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_add_three_input.cpp frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_compare_and_set.cpp + frontend/maxwell/translate/impl/integer_funnel_shift.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp frontend/maxwell/translate/impl/integer_scaled_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 1f7d84871..bf1b5ace6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -232,9 +232,11 @@ Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id a, Id b); -Id EmitShiftRightLogical64(EmitContext& ctx, Id a, Id b); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id a, Id b); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index f5001cdaa..5ab3b5e86 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -74,16 +74,24 @@ Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -Id EmitShiftRightLogical32(EmitContext& ctx, Id a, Id b) { - return ctx.OpShiftRightLogical(ctx.U32[1], a, b); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.U64, base, shift); } -Id EmitShiftRightLogical64(EmitContext& ctx, Id a, Id b) { - return ctx.OpShiftRightLogical(ctx.U64, a, b); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U32[1], base, shift); } -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id a, Id b) { - return ctx.OpShiftRightArithmetic(ctx.U32[1], a, b); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightLogical(ctx.U64, base, shift); +} + +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift); +} + +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); } Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 01f52183c..1659b7f3b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -813,8 +813,15 @@ U32 IREmitter::IAbs(const U32& value) { return Inst(Opcode::IAbs32, value); } -U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { - return Inst(Opcode::ShiftLeftLogical32, base, shift); +U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftLeftLogical32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftLeftLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } } U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { @@ -828,8 +835,15 @@ U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { } } -U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { - return Inst(Opcode::ShiftRightArithmetic32, base, shift); +U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftRightArithmetic32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftRightArithmetic64, base, shift); + default: + ThrowInvalidType(base.Type()); + } } U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 33bf2a7d0..6e29bf0e2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -150,9 +150,9 @@ public: [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); - [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); + [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); - [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b51aaaef5..75f09ebfc 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -236,9 +236,11 @@ OPCODE(INeg32, U32, U32, OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) OPCODE(ShiftRightLogical64, U64, U64, U32, ) OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) OPCODE(BitwiseAnd32, U32, U32, U32, ) OPCODE(BitwiseOr32, U32, U32, U32, ) OPCODE(BitwiseXor32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..d8d6c939e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -0,0 +1,77 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class MaxShift : u64 { + U32, + Undefined, + U64, + S64, +}; + +IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, + bool right_shift, bool is_signed) { + if (!right_shift) { + return ir.ShiftLeftLogical(packed_int, safe_shift); + } + if (is_signed) { + return ir.ShiftRightArithmetic(packed_int, safe_shift); + } + return ir.ShiftRightLogical(packed_int, safe_shift); +} + +void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, + bool right_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<0, 8, IR::Reg> lo_bits_reg; + BitField<37, 2, MaxShift> max_shift; + BitField<48, 2, u64> x_mode; + BitField<50, 1, u64> wrap; + } const shf{insn}; + if (shf.x_mode != 0) { + throw NotImplementedException("SHF X Mode"); + } + if (shf.max_shift == MaxShift::Undefined) { + throw NotImplementedException("SHF Use of undefined MaxShift value"); + } + const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; + const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; + const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; + const IR::U32 safe_shift{shf.wrap != 0 + ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) + : v.ir.UMin(shift, max_shift)}; + + const bool is_signed{shf.max_shift == MaxShift::S64}; + const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; + const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; + + const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; + v.X(shf.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHF_l_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_l_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_r_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); +} + +void TranslatorVisitor::SHF_r_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 62863aff6..2ab90d1bf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,22 +553,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { ThrowNotImplemented(Opcode::SETLMEMBASE); } -void TranslatorVisitor::SHF_l_reg(u64) { - ThrowNotImplemented(Opcode::SHF_l_reg); -} - -void TranslatorVisitor::SHF_l_imm(u64) { - ThrowNotImplemented(Opcode::SHF_l_imm); -} - -void TranslatorVisitor::SHF_r_reg(u64) { - ThrowNotImplemented(Opcode::SHF_r_reg); -} - -void TranslatorVisitor::SHF_r_imm(u64) { - ThrowNotImplemented(Opcode::SHF_r_imm); -} - void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } From 7d6ba5b9840a4ba00a9b0f207c1c119d60dcf8b7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 7 Mar 2021 22:01:22 -0500 Subject: [PATCH 056/770] shader: Implement R2P --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 3 +- .../backend/spirv/emit_spirv_integer.cpp | 9 ++- .../backend/spirv/emit_spirv_select.cpp | 4 ++ .../frontend/ir/ir_emitter.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../impl/move_register_to_predicate.cpp | 71 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ---- 8 files changed, 88 insertions(+), 15 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index ddd34e915..cc38b28ed 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -94,6 +94,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/logic_operation_three_input.cpp frontend/maxwell/translate/impl/move_predicate_to_register.cpp frontend/maxwell/translate/impl/move_register.cpp + frontend/maxwell/translate/impl/move_register_to_predicate.cpp frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/impl/predicate_set_predicate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index bf1b5ace6..92387ca28 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -120,6 +120,7 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); @@ -242,7 +243,7 @@ Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 5ab3b5e86..c9de204b0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -114,8 +114,13 @@ Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count); } -Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { - return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; + if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); + } + return result; } Id EmitBitReverse32(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 21cca4455..0ae127d50 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,6 +6,10 @@ namespace Shader::Backend::SPIRV { +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U1, cond, true_value, false_value); +} + Id EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Id cond, [[maybe_unused]] Id true_value, [[maybe_unused]] Id false_value) { throw NotImplementedException("SPIR-V Instruction"); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1659b7f3b..f38b46bac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -412,6 +412,8 @@ Value IREmitter::Select(const U1& condition, const Value& true_value, const Valu throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); } switch (true_value.Type()) { + case Type::U1: + return Inst(Opcode::SelectU1, condition, true_value, false_value); case Type::U8: return Inst(Opcode::SelectU8, condition, true_value, false_value); case Type::U16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 75f09ebfc..c4e72c84d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -115,6 +115,7 @@ OPCODE(CompositeInsertF64x3, F64x3, F64x OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations +OPCODE(SelectU1, U1, U1, U1, U1, ) OPCODE(SelectU8, U8, U1, U8, U8, ) OPCODE(SelectU16, U16, U1, U16, U16, ) OPCODE(SelectU32, U32, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; + +void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { + switch (index) { + case 0: + return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); + case 1: + return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); + case 2: + return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); + case 3: + return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); + default: + throw LogicError("Unreachable R2P index"); + } +} + +void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { + union { + u64 raw; + BitField<8, 8, IR::Reg> src_reg; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const r2p{insn}; + const IR::U32 src{v.X(r2p.src_reg)}; + const IR::U32 count{v.ir.Imm32(1)}; + const bool pr_mode{r2p.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset_base{static_cast(r2p.byte_selector) * 8}; + for (u32 index = 0; index < num_items; ++index) { + const IR::U32 offset{v.ir.Imm32(offset_base + index)}; + const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; + const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; + const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; + const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; + if (pr_mode) { + const IR::Pred pred{index}; + v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); + } else { + SetFlag(v.ir, inv_mask_bit, src_bit, index); + } + } +} +} // Anonymous namespace + +void TranslatorVisitor::R2P_reg(u64 insn) { + R2P(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::R2P_cbuf(u64 insn) { + R2P(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::R2P_imm(u64 insn) { + R2P(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2ab90d1bf..fc6030e04 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -513,18 +513,6 @@ void TranslatorVisitor::R2B(u64) { ThrowNotImplemented(Opcode::R2B); } -void TranslatorVisitor::R2P_reg(u64) { - ThrowNotImplemented(Opcode::R2P_reg); -} - -void TranslatorVisitor::R2P_cbuf(u64) { - ThrowNotImplemented(Opcode::R2P_cbuf); -} - -void TranslatorVisitor::R2P_imm(u64) { - ThrowNotImplemented(Opcode::R2P_imm); -} - void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: [PATCH 057/770] shader: Initial support for textures and TEX --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_context.cpp | 69 ++- .../backend/spirv/emit_context.h | 7 + .../backend/spirv/emit_spirv.cpp | 12 + .../backend/spirv/emit_spirv.h | 32 +- .../backend/spirv/emit_spirv_convert.cpp | 48 ++ .../backend/spirv/emit_spirv_image.cpp | 146 +++++ .../backend/spirv/emit_spirv_memory.cpp | 18 +- src/shader_recompiler/environment.h | 2 + src/shader_recompiler/file_environment.cpp | 4 + src/shader_recompiler/file_environment.h | 4 +- .../frontend/ir/ir_emitter.cpp | 133 +++- .../frontend/ir/ir_emitter.h | 21 +- .../frontend/ir/microinstruction.cpp | 73 ++- .../frontend/ir/microinstruction.h | 22 +- src/shader_recompiler/frontend/ir/modifiers.h | 10 + src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 569 +++++++++--------- src/shader_recompiler/frontend/ir/reg.h | 11 + src/shader_recompiler/frontend/ir/value.h | 1 + .../frontend/maxwell/maxwell.inc | 4 +- .../frontend/maxwell/program.cpp | 1 + .../translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_sample.cpp | 232 +++++++ .../ir_opt/collect_shader_info_pass.cpp | 19 + .../global_memory_to_storage_buffer_pass.cpp | 15 +- src/shader_recompiler/ir_opt/passes.h | 2 + src/shader_recompiler/ir_opt/texture_pass.cpp | 199 ++++++ src/shader_recompiler/shader_info.h | 52 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 101 ++++ .../renderer_vulkan/vk_compute_pipeline.h | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 + .../renderer_vulkan/vk_rasterizer.cpp | 3 +- 33 files changed, 1489 insertions(+), 342 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp create mode 100644 src/shader_recompiler/ir_opt/texture_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index cc38b28ed..fa268d38f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_control_flow.cpp backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp + backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp @@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/texture_sample.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp @@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC ir_opt/lower_fp16_to_fp32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp + ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h profile.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d2dbd56d4..21900d387 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -12,6 +12,43 @@ #include "shader_recompiler/backend/spirv/emit_context.h" namespace Shader::Backend::SPIRV { +namespace { +Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id type{ctx.F32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); + case TextureType::ColorCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); + case TextureType::ColorArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + case TextureType::Shadow1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); + case TextureType::ShadowArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); + case TextureType::Shadow2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); + case TextureType::ShadowArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); + case TextureType::Shadow3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); + case TextureType::ShadowCube: + return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); + case TextureType::ShadowArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} +} // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { defs[0] = sirit_ctx.Name(base_type, name); @@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program) u32 binding{}; DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); + DefineTextures(program.info, binding); DefineLabels(program); } @@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) { return value.Inst()->Definition(); } switch (value.Type()) { + case IR::Type::Void: + // Void instructions are used for optional arguments (e.g. texture offsets) + // They are not meant to be used in the SPIR-V module + return Id{}; case IR::Type::U1: return value.U1() ? true_value : false_value; case IR::Type::U32: @@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); u32 index{}; - for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); u32 index{}; - for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineTextures(const Info& info, u32& binding) { + textures.reserve(info.texture_descriptors.size()); + for (const TextureDescriptor& desc : info.texture_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of textures"); + } + const Id type{TypeSampledImage(ImageType(*this, desc))}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + for (u32 index = 0; index < desc.count; ++index) { + // TODO: Pass count info + textures.push_back(TextureDefinition{ + .id{id}, + .type{type}, + }); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (const IR::Function& function : program.functions) { for (IR::Block* const block : function.blocks) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d20cf387e..8b3109eb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -29,6 +29,11 @@ private: std::array defs{}; }; +struct TextureDefinition { + Id id; + Id type; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program); @@ -56,6 +61,7 @@ public: std::array cbufs{}; std::array ssbos{}; + std::vector textures; Id workgroup_id{}; Id local_invocation_id{}; @@ -66,6 +72,7 @@ private: void DefineSpecialVariables(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 8097fe82d..a94e9cb2d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -221,6 +221,14 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program workgroup_size[2]); SetupDenormControl(profile, program, ctx, func); + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); return ctx.Assemble(); } @@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } +void EmitGetSparseFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 92387ca28..69698c478 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -void EmitWriteStorage128(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); @@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); Id EmitFPAbs16(EmitContext& ctx, Id value); Id EmitFPAbs32(EmitContext& ctx, Id value); Id EmitFPAbs64(EmitContext& ctx, Id value); @@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, Id offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, Id offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, Id offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, Id offset); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index edcc2a1cc..2aff673aa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { return ctx.OpFConvert(ctx.F64[1], value); } +Id EmitConvertF16S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF32S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF32U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF64S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +Id EmitConvertF64U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..5f4783c95 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -0,0 +1,146 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +class ImageOperands { +public: + explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, + Id lod, Id offset) { + if (has_bias) { + const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Bias, bias); + } + if (has_lod) { + const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Lod, lod_value); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + void Add(spv::ImageOperandsMask new_mask, Id value) { + mask = static_cast(static_cast(mask) | + static_cast(new_mask)); + operands.push_back(value); + } + + std::span Span() const noexcept { + return std::span{operands.data(), operands.size()}; + } + + spv::ImageOperandsMask Mask() const noexcept { + return mask; + } + +private: + boost::container::static_vector operands; + spv::ImageOperandsMask mask{}; +}; + +Id Texture(EmitContext& ctx, const IR::Value& index) { + if (index.IsImmediate()) { + const TextureDefinition def{ctx.textures.at(index.U32())}; + return ctx.OpLoad(def.type, def.id); + } + throw NotImplementedException("Indirect texture sample"); +} + +template +Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, + Id result_type, Args&&... args) { + IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (!sparse) { + return (ctx.*non_sparse_ptr)(result_type, std::forward(args)...); + } + const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; + const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward(args)...)}; + const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; + sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); + sparse->Invalidate(); + return ctx.OpCompositeExtract(result_type, sample, 1U); +} +} // Anonymous namespace + +Id EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, + &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 808c1b401..7d3efc741 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } -void EmitWriteStorage128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + // TODO: Support reinterpreting bindings, guaranteed to be aligned + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + for (u32 element = 0; element < 4; ++element) { + Id index = base_index; + if (element > 0) { + index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); + } + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); + } } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0ba681fb9..0fcb68050 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -12,6 +12,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 5127523f9..21700c72b 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { return data[offset / 8]; } +u32 FileEnvironment::TextureBoundBuffer() { + throw NotImplementedException("Texture bound buffer serialization"); +} + std::array FileEnvironment::WorkgroupSize() { return {1, 1, 1}; } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index b8c4bbadd..62302bc8e 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -3,7 +3,7 @@ #include #include "common/common_types.h" -#include "environment.h" +#include "shader_recompiler/environment.h" namespace Shader { @@ -14,6 +14,8 @@ public: u64 ReadInstruction(u32 offset) override; + u32 TextureBoundBuffer() override; + std::array WorkgroupSize() override; private: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f38b46bac..ae3354c66 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -7,11 +7,24 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { - -[[noreturn]] static void ThrowInvalidType(Type type) { +namespace { +[[noreturn]] void ThrowInvalidType(Type type) { throw InvalidArgument("Invalid type {}", type); } +Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { + if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { + return ir.CompositeConstruct(bias_lod, lod_clamp); + } else if (!bias_lod.IsEmpty()) { + return bias_lod; + } else if (!lod_clamp.IsEmpty()) { + return lod_clamp; + } else { + return Value{}; + } +} +} // Anonymous namespace + U1 IREmitter::Imm1(bool value) const { return U1{Value{value}}; } @@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } +U1 IREmitter::GetSparseFromOp(const Value& op) { + return Inst(Opcode::GetSparseFromOp, op); +} + F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); @@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } +F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16S32, value); + case Type::U64: + return Inst(Opcode::ConvertF16S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32S32, value); + case Type::U64: + return Inst(Opcode::ConvertF32S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64S32, value); + case Type::U64: + return Inst(Opcode::ConvertF64S64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16U32, value); + case Type::U64: + return Inst(Opcode::ConvertF16U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32U32, value); + case Type::U64: + return Inst(Opcode::ConvertF32U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64U32, value); + case Type::U64: + return Inst(Opcode::ConvertF64U64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { + if (is_signed) { + return ConvertSToF(bitsize, value); + } else { + return ConvertUToF(bitsize, value); + } +} + U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: @@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod + : Opcode::BindlessImageSampleImplicitLod}; + return Inst(op, Flags{info}, handle, coords, bias_lc, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod + : Opcode::BindlessImageSampleExplicitLod}; + return Inst(op, Flags{info}, handle, coords, lod_lc, offset); +} + +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod + : Opcode::BindlessImageSampleDrefImplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, bias_lc, offset); +} + +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& lod, const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod + : Opcode::BindlessImageSampleDrefExplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e29bf0e2..cb2a7710a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -91,6 +91,7 @@ public: [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + [[nodiscard]] U1 GetSparseFromOp(const Value& op); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); @@ -159,7 +160,7 @@ public: [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, const U32& count); [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, - bool is_signed); + bool is_signed = false); [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& value); @@ -186,10 +187,28 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, + const F32& lod, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index d6a9be87d..88e186f21 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -10,26 +10,27 @@ #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { - -static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { +namespace { +void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { if (inst && inst->Opcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } -static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { +void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { if (dest_inst) { throw LogicError("Only one of each type of pseudo-op allowed"); } dest_inst = pseudo_inst; } -static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { +void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { if (inst->Opcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } +} // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { @@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetSignFromOp: case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: + case Opcode::GetSparseFromOp: return true; default: return false; @@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { [](const IR::Value& value) { return value.IsImmediate(); }); } -bool Inst::HasAssociatedPseudoOperation() const noexcept { - return zero_inst || sign_inst || carry_inst || overflow_inst; -} - Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { - // This is faster than doing a search through the block. + if (!associated_insts) { + return nullptr; + } switch (opcode) { case Opcode::GetZeroFromOp: - CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); - return zero_inst; + CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); + return associated_insts->zero_inst; case Opcode::GetSignFromOp: - CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); - return sign_inst; + CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); + return associated_insts->sign_inst; case Opcode::GetCarryFromOp: - CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); - return carry_inst; + CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); + return associated_insts->carry_inst; case Opcode::GetOverflowFromOp: - CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); - return overflow_inst; + CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); + return associated_insts->overflow_inst; + case Opcode::GetSparseFromOp: + CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); + return associated_insts->sparse_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(inst->zero_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(inst->sign_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(inst->carry_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(inst->overflow_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->overflow_inst, this); + break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sparse_inst, this); break; default: break; @@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { Inst* const inst{value.Inst()}; --inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 321393dd7..d5336c438 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -22,7 +22,7 @@ namespace Shader::IR { class Block; -constexpr size_t MAX_ARG_COUNT = 4; +struct AssociatedInsts; class Inst : public boost::intrusive::list_base_hook<> { public: @@ -50,6 +50,11 @@ public: return op; } + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + /// Determines whether or not this instruction may have side effects. [[nodiscard]] bool MayHaveSideEffects() const noexcept; @@ -60,8 +65,6 @@ public: /// Determines if all arguments of this instruction are immediates. [[nodiscard]] bool AreAllArgsImmediates() const; - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); @@ -122,14 +125,21 @@ private: u32 definition{}; union { NonTriviallyDummy dummy{}; - std::array args; std::vector> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* sparse_inst; + Inst* zero_inst{}; }; - Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 44652eae7..ad07700ae 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,7 +4,9 @@ #pragma once +#include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { @@ -30,4 +32,12 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +union TextureInstInfo { + u32 raw; + BitField<0, 8, TextureType> type; + BitField<8, 1, u32> has_bias; + BitField<16, 1, u32> has_lod_clamp; +}; +static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1f188411a..8492a13d5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -14,7 +14,7 @@ namespace { struct OpcodeMeta { std::string_view name; Type type; - std::array arg_types; + std::array arg_types; }; using enum Type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c4e72c84d..aa011fab1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -2,301 +2,330 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Phi, Opaque, ) -OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Void, Void, ) +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... +OPCODE(Phi, Opaque, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) +OPCODE(Return, Void, ) // Context getters/setters -OPCODE(GetRegister, U32, Reg, ) -OPCODE(SetRegister, Void, Reg, U32, ) -OPCODE(GetPred, U1, Pred, ) -OPCODE(SetPred, Void, Pred, U1, ) -OPCODE(GetGotoVariable, U1, U32, ) -OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) -OPCODE(GetZFlag, U1, Void, ) -OPCODE(GetSFlag, U1, Void, ) -OPCODE(GetCFlag, U1, Void, ) -OPCODE(GetOFlag, U1, Void, ) -OPCODE(SetZFlag, Void, U1, ) -OPCODE(SetSFlag, Void, U1, ) -OPCODE(SetCFlag, Void, U1, ) -OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupId, U32x3, ) -OPCODE(LocalInvocationId, U32x3, ) +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(UndefU1, U1, ) -OPCODE(UndefU8, U8, ) -OPCODE(UndefU16, U16, ) -OPCODE(UndefU32, U32, ) -OPCODE(UndefU64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) // Storage buffer operations -OPCODE(LoadStorageU8, U32, U32, U32, ) -OPCODE(LoadStorageS8, U32, U32, U32, ) -OPCODE(LoadStorageU16, U32, U32, U32, ) -OPCODE(LoadStorageS16, U32, U32, U32, ) -OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, U32x2, U32, U32, ) -OPCODE(LoadStorage128, U32x4, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) -OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) // Vector utility -OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) -OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) -OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) -OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) -OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) -OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) -OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) -OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) -OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) -OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) -OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) -OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) -OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) -OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) -OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) -OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) -OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) -OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) -OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) -OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) -OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) -OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) -OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) -OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) -OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) -OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) -OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) -OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) -OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) -OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) -OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) -OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) -OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) -OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) -OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) -OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations -OPCODE(SelectU1, U1, U1, U1, U1, ) -OPCODE(SelectU8, U8, U1, U8, U8, ) -OPCODE(SelectU16, U16, U1, U16, U16, ) -OPCODE(SelectU32, U32, U1, U32, U32, ) -OPCODE(SelectU64, U64, U1, U64, U64, ) -OPCODE(SelectF16, F16, U1, F16, F16, ) -OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectU1, U1, U1, U1, U1, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions -OPCODE(BitCastU16F16, U16, F16, ) -OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastF16U16, F16, U16, ) -OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) -OPCODE(PackUint2x32, U64, U32x2, ) -OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x16, U32, F16x2, ) -OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackHalf2x16, U32, F32x2, ) -OPCODE(UnpackHalf2x16, F32x2, U32, ) -OPCODE(PackDouble2x32, F64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, F64, ) +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit -OPCODE(GetZeroFromOp, U1, Opaque, ) -OPCODE(GetSignFromOp, U1, Opaque, ) -OPCODE(GetCarryFromOp, U1, Opaque, ) -OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetSparseFromOp, U1, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, F16, F16, ) -OPCODE(FPAbs32, F32, F32, ) -OPCODE(FPAbs64, F64, F64, ) -OPCODE(FPAdd16, F16, F16, F16, ) -OPCODE(FPAdd32, F32, F32, F32, ) -OPCODE(FPAdd64, F64, F64, F64, ) -OPCODE(FPFma16, F16, F16, F16, F16, ) -OPCODE(FPFma32, F32, F32, F32, F32, ) -OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) -OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) -OPCODE(FPMin64, F64, F64, F64, ) -OPCODE(FPMul16, F16, F16, F16, ) -OPCODE(FPMul32, F32, F32, F32, ) -OPCODE(FPMul64, F64, F64, F64, ) -OPCODE(FPNeg16, F16, F16, ) -OPCODE(FPNeg32, F32, F32, ) -OPCODE(FPNeg64, F64, F64, ) -OPCODE(FPRecip32, F32, F32, ) -OPCODE(FPRecip64, F64, F64, ) -OPCODE(FPRecipSqrt32, F32, F32, ) -OPCODE(FPRecipSqrt64, F64, F64, ) -OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPSin, F32, F32, ) -OPCODE(FPExp2, F32, F32, ) -OPCODE(FPCos, F32, F32, ) -OPCODE(FPLog2, F32, F32, ) -OPCODE(FPSaturate16, F16, F16, ) -OPCODE(FPSaturate32, F32, F32, ) -OPCODE(FPSaturate64, F64, F64, ) -OPCODE(FPRoundEven16, F16, F16, ) -OPCODE(FPRoundEven32, F32, F32, ) -OPCODE(FPRoundEven64, F64, F64, ) -OPCODE(FPFloor16, F16, F16, ) -OPCODE(FPFloor32, F32, F32, ) -OPCODE(FPFloor64, F64, F64, ) -OPCODE(FPCeil16, F16, F16, ) -OPCODE(FPCeil32, F32, F32, ) -OPCODE(FPCeil64, F64, F64, ) -OPCODE(FPTrunc16, F16, F16, ) -OPCODE(FPTrunc32, F32, F32, ) -OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPOrdEqual16, U1, F16, F16, ) -OPCODE(FPOrdEqual32, U1, F32, F32, ) -OPCODE(FPOrdEqual64, U1, F64, F64, ) -OPCODE(FPUnordEqual16, U1, F16, F16, ) -OPCODE(FPUnordEqual32, U1, F32, F32, ) -OPCODE(FPUnordEqual64, U1, F64, F64, ) -OPCODE(FPOrdNotEqual16, U1, F16, F16, ) -OPCODE(FPOrdNotEqual32, U1, F32, F32, ) -OPCODE(FPOrdNotEqual64, U1, F64, F64, ) -OPCODE(FPUnordNotEqual16, U1, F16, F16, ) -OPCODE(FPUnordNotEqual32, U1, F32, F32, ) -OPCODE(FPUnordNotEqual64, U1, F64, F64, ) -OPCODE(FPOrdLessThan16, U1, F16, F16, ) -OPCODE(FPOrdLessThan32, U1, F32, F32, ) -OPCODE(FPOrdLessThan64, U1, F64, F64, ) -OPCODE(FPUnordLessThan16, U1, F16, F16, ) -OPCODE(FPUnordLessThan32, U1, F32, F32, ) -OPCODE(FPUnordLessThan64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) -OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) // Integer operations -OPCODE(IAdd32, U32, U32, U32, ) -OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(ISub32, U32, U32, U32, ) -OPCODE(ISub64, U64, U64, U64, ) -OPCODE(IMul32, U32, U32, U32, ) -OPCODE(INeg32, U32, U32, ) -OPCODE(INeg64, U64, U64, ) -OPCODE(IAbs32, U32, U32, ) -OPCODE(ShiftLeftLogical32, U32, U32, U32, ) -OPCODE(ShiftLeftLogical64, U64, U64, U32, ) -OPCODE(ShiftRightLogical32, U32, U32, U32, ) -OPCODE(ShiftRightLogical64, U64, U64, U32, ) -OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) -OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) -OPCODE(BitwiseAnd32, U32, U32, U32, ) -OPCODE(BitwiseOr32, U32, U32, U32, ) -OPCODE(BitwiseXor32, U32, U32, U32, ) -OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) -OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) -OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) -OPCODE(BitReverse32, U32, U32, ) -OPCODE(BitCount32, U32, U32, ) -OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) -OPCODE(FindSMsb32, U32, U32, ) -OPCODE(FindUMsb32, U32, U32, ) -OPCODE(SMin32, U32, U32, U32, ) -OPCODE(UMin32, U32, U32, U32, ) -OPCODE(SMax32, U32, U32, U32, ) -OPCODE(UMax32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) -OPCODE(IEqual, U1, U32, U32, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) -OPCODE(INotEqual, U1, U32, U32, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations -OPCODE(LogicalOr, U1, U1, U1, ) -OPCODE(LogicalAnd, U1, U1, U1, ) -OPCODE(LogicalXor, U1, U1, U1, ) -OPCODE(LogicalNot, U1, U1, ) +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) // Conversion operations -OPCODE(ConvertS16F16, U32, F16, ) -OPCODE(ConvertS16F32, U32, F32, ) -OPCODE(ConvertS16F64, U32, F64, ) -OPCODE(ConvertS32F16, U32, F16, ) -OPCODE(ConvertS32F32, U32, F32, ) -OPCODE(ConvertS32F64, U32, F64, ) -OPCODE(ConvertS64F16, U64, F16, ) -OPCODE(ConvertS64F32, U64, F32, ) -OPCODE(ConvertS64F64, U64, F64, ) -OPCODE(ConvertU16F16, U32, F16, ) -OPCODE(ConvertU16F32, U32, F32, ) -OPCODE(ConvertU16F64, U32, F64, ) -OPCODE(ConvertU32F16, U32, F16, ) -OPCODE(ConvertU32F32, U32, F32, ) -OPCODE(ConvertU32F64, U32, F64, ) -OPCODE(ConvertU64F16, U64, F16, ) -OPCODE(ConvertU64F32, U64, F32, ) -OPCODE(ConvertU64F64, U64, F64, ) -OPCODE(ConvertU64U32, U64, U32, ) -OPCODE(ConvertU32U64, U32, U64, ) -OPCODE(ConvertF16F32, F16, F32, ) -OPCODE(ConvertF32F16, F32, F16, ) -OPCODE(ConvertF32F64, F32, F64, ) -OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S32, F16, U32, ) +OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U32, F16, U32, ) +OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S32, F32, U32, ) +OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U32, F32, U32, ) +OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S32, F64, U32, ) +OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF64U64, F64, U64, ) + +// Image operations +OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 771094eb9..8fea05f7b 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } +[[nodiscard]] constexpr Reg operator++(Reg& reg) { + reg = reg + 1; + return reg; +} + +[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { + const Reg copy{reg}; + reg = reg + 1; + return copy; +} + [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { return static_cast(reg); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 9b7e1480b..3602883d6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -75,6 +75,7 @@ private: f64 imm_f64; }; }; +static_assert(std::is_trivially_copyable_v); template class TypedValue : public Value { diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") INST(SYNC, "SYNC", "1111 0000 1111 1---") -INST(TEX, "TEX", "1100 00-- --11 1---") -INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEX, "TEX", "1100 0--- ---- ----") +INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6662ef4cd..960beadd4 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) { throw NotImplementedException("Constant buffer with non-immediate index"); } break; + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: { + const TextureType type{inst.Flags().type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || + type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 965e52135..2625c0bb2 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -226,6 +226,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, } // Reversed loops are more likely to find the right result for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; if (inst->Opcode() == IR::Opcode::Phi) { // If we are going through a phi node, mark the current block as visited visited.insert(block); @@ -235,15 +236,11 @@ std::optional Track(IR::Block* block, const IR::Value& value, // Already visited, skip continue; } - const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } - } else { - const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; } } return std::nullopt; diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 38106308c..3b7e7306b 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -6,6 +6,7 @@ #include +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" @@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); +void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..80e4ad6a9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -0,0 +1,199 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +struct ConstBufferAddr { + u32 index; + u32 offset; +}; + +struct TextureInst { + ConstBufferAddr cbuf; + IR::Inst* inst; + IR::Block* block; +}; + +using TextureInstVector = boost::container::small_vector; + +using VisitedBlocks = boost::container::flat_set, + boost::container::small_vector>; + +IR::Opcode IndexedInstruction(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + return IR::Opcode::ImageSampleImplicitLod; + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + return IR::Opcode::ImageSampleExplicitLod; + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + return IR::Opcode::ImageSampleDrefImplicitLod; + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return IR::Opcode::ImageSampleDrefExplicitLod; + default: + return IR::Opcode::Void; + } +} + +bool IsBindless(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return true; + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + return false; + default: + throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + } +} + +bool IsTextureInstruction(const IR::Inst& inst) { + return IndexedInstruction(inst) != IR::Opcode::Void; +} + +std::optional Track(IR::Block* block, const IR::Value& value, + VisitedBlocks& visited) { + if (value.IsImmediate()) { + // Immediates can't be a storage buffer + return std::nullopt; + } + const IR::Inst* const inst{value.InstRecursive()}; + if (inst->Opcode() == IR::Opcode::GetCbuf) { + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support arrays of textures + return std::nullopt; + } + return ConstBufferAddr{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + } + // Reversed loops are more likely to find the right result + for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; + if (inst->Opcode() == IR::Opcode::Phi) { + // If we are going through a phi node, mark the current block as visited + visited.insert(block); + // and skip already visited blocks to avoid looping forever + IR::Block* const phi_block{inst->PhiBlock(arg)}; + if (visited.contains(phi_block)) { + // Already visited, skip + continue; + } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; + if (storage_buffer) { + return *storage_buffer; + } + } + return std::nullopt; +} + +TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { + ConstBufferAddr addr; + if (IsBindless(inst)) { + VisitedBlocks visited; + const std::optional track_addr{Track(block, IR::Value{&inst}, visited)}; + if (!track_addr) { + throw NotImplementedException("Failed to track bindless texture constant buffer"); + } + addr = *track_addr; + } else { + addr = ConstBufferAddr{ + .index{env.TextureBoundBuffer()}, + .offset{inst.Arg(0).U32()}, + }; + } + return TextureInst{ + .cbuf{addr}, + .inst{&inst}, + .block{block}, + }; +} + +class Descriptors { +public: + explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + + u32 Add(const TextureDescriptor& descriptor) { + // TODO: Handle arrays + auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { + return descriptor.cbuf_index == existing.cbuf_index && + descriptor.cbuf_offset == existing.cbuf_offset && + descriptor.type == existing.type; + })}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(descriptor); + return static_cast(descriptors.size()) - 1; + } + +private: + TextureDescriptors& descriptors; +}; +} // Anonymous namespace + +void TexturePass(Environment& env, IR::Program& program) { + TextureInstVector to_replace; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; + } + to_replace.push_back(MakeInst(env, block, inst)); + } + } + } + // Sort instructions to visit textures by constant buffer index, then by offset + std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.offset < rhs.cbuf.offset; + }); + std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.index < rhs.cbuf.index; + }); + Descriptors descriptors{program.info.texture_descriptors}; + for (TextureInst& texture_inst : to_replace) { + // TODO: Handle arrays + IR::Inst* const inst{texture_inst.inst}; + const u32 index{descriptors.Add(TextureDescriptor{ + .type{inst->Flags().type}, + .cbuf_index{texture_inst.cbuf.index}, + .cbuf_offset{texture_inst.cbuf.offset}, + .count{1}, + })}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + inst->SetArg(0, IR::Value{index}); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 8766bf13e..103a2f0b4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -8,25 +8,51 @@ #include "common/common_types.h" +#include #include namespace Shader { +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + ColorArrayCube, + Shadow1D, + ShadowArray1D, + Shadow2D, + ShadowArray2D, + Shadow3D, + ShadowCube, + ShadowArrayCube, +}; + +struct TextureDescriptor { + TextureType type; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureDescriptors = boost::container::small_vector; + +struct ConstantBufferDescriptor { + u32 index; + u32 count; +}; + +struct StorageBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; + struct Info { static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_SSBOS{16}; - struct ConstantBufferDescriptor { - u32 index; - u32 count; - }; - - struct StorageBufferDescriptor { - u32 cbuf_index; - u32 cbuf_offset; - u32 count; - }; - bool uses_workgroup_id{}; bool uses_local_invocation_id{}; bool uses_fp16{}; @@ -35,12 +61,16 @@ struct Info { bool uses_fp16_denorms_preserve{}; bool uses_fp32_denorms_flush{}; bool uses_fp32_denorms_preserve{}; + bool uses_image_1d{}; + bool uses_sampled_1d{}; + bool uses_sparse_residency{}; u32 constant_buffer_mask{}; boost::container::static_vector constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; + TextureDescriptors texture_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a658a3276..ef8bef6ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh }); ++binding; } + for (const auto& desc : info.texture_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } return device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( ++binding; offset += sizeof(DescriptorUpdateEntry); } + for (const auto& desc : info.texture_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, .pNext = nullptr, @@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( .set = 0, }); } + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; + +VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); +} } // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, @@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.BindHostComputeBuffers(); } +void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, + TextureCache& texture_cache) { + texture_cache.SynchronizeComputeDescriptors(); + + static constexpr size_t max_elements = 64; + std::array image_view_ids; + boost::container::static_vector image_view_indices; + boost::container::static_vector sampler_handles; + + const auto& launch_desc{kepler_compute.launch_description}; + const auto& cbufs{launch_desc.const_buffer_config}; + const bool via_header_index{launch_desc.linked_tsc}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); + + const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; + const u32 raw_handle{gpu_memory.Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + } + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + size_t index{}; + for (const auto& desc : info.texture_descriptors) { + const VkSampler vk_sampler{sampler_handles[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); + ++index; + } +} + VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index dc045d524..08d73a2a4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,9 +6,11 @@ #include "common/common_types.h" #include "shader_recompiler/shader_info.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -30,6 +32,8 @@ public: ComputePipeline(const ComputePipeline&) = delete; void ConfigureBufferCache(BufferCache& buffer_cache); + void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2497c2385..bcb7dd2eb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,10 @@ public: return gpu_memory.Read(program_base + address); } + u32 TextureBoundBuffer() override { + return kepler_compute.regs.tex_cb_index; + } + std::array WorkgroupSize() override { const auto& qmd{kepler_compute.launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1b662f9f3..c94419d29 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex}; + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; update_descriptor_queue.Acquire(); pipeline->ConfigureBufferCache(buffer_cache); + pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; const auto& qmd{kepler_compute.launch_description}; From 3a63fa0477ea8297c80133d35494e1dfdc012f95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Mar 2021 17:14:57 -0300 Subject: [PATCH 058/770] shader: Partial implementation of LDC --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 63 ++++++-- .../backend/spirv/emit_context.h | 22 ++- .../backend/spirv/emit_spirv.h | 8 +- .../spirv/emit_spirv_context_get_set.cpp | 56 +++++++- .../frontend/ir/ir_emitter.cpp | 22 ++- .../frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 8 +- .../frontend/maxwell/translate/impl/impl.cpp | 16 ++- .../maxwell/translate/impl/load_constant.cpp | 85 +++++++++++ .../translate/impl/not_implemented.cpp | 4 - .../ir_opt/collect_shader_info_pass.cpp | 135 +++++++++++++++++- .../ir_opt/constant_propagation_pass.cpp | 22 ++- .../global_memory_to_storage_buffer_pass.cpp | 2 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- src/shader_recompiler/shader_info.h | 6 + 16 files changed, 405 insertions(+), 50 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fa268d38f..755db5dfa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -88,6 +88,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_shift_right.cpp frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp + frontend/maxwell/translate/impl/load_constant.cpp frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 21900d387..278b26b50 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -104,15 +104,23 @@ void EmitContext::DefineCommonTypes(const Info& info) { U1 = Name(TypeBool(), "u1"); - // TODO: Conditionally define these - AddCapability(spv::Capability::Int16); - AddCapability(spv::Capability::Int64); - U16 = Name(TypeInt(16, false), "u16"); - U64 = Name(TypeInt(64, false), "u64"); - F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + if (info.uses_int8) { + AddCapability(spv::Capability::Int8); + U8 = Name(TypeInt(8, false), "u8"); + S8 = Name(TypeInt(8, true), "s8"); + } + if (info.uses_int16) { + AddCapability(spv::Capability::Int16); + U16 = Name(TypeInt(16, false), "u16"); + S16 = Name(TypeInt(16, true), "s16"); + } + if (info.uses_int64) { + AddCapability(spv::Capability::Int64); + U64 = Name(TypeInt(64, false), "u64"); + } if (info.uses_fp16) { AddCapability(spv::Capability::Float16); F16.Define(*this, TypeFloat(16), "f16"); @@ -151,26 +159,51 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } - const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; - Decorate(array_type, spv::Decoration::ArrayStride, 4U); + if (True(info.used_constant_buffer_types & IR::Type::U8)) { + DefineConstantBuffers(info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstantBuffers(info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } + if (True(info.used_constant_buffer_types & IR::Type::U16)) { + DefineConstantBuffers(info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); + DefineConstantBuffers(info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); + } + if (True(info.used_constant_buffer_types & IR::Type::U32)) { + DefineConstantBuffers(info, &UniformDefinitions::U32, binding, U32[1], 'u', sizeof(u32)); + } + if (True(info.used_constant_buffer_types & IR::Type::F32)) { + DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32)); + } + if (True(info.used_constant_buffer_types & IR::Type::U64)) { + DefineConstantBuffers(info, &UniformDefinitions::U64, binding, U64, 'u', sizeof(u64)); + } + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + binding += desc.count; + } +} + +void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, "cbuf_block"); + Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "data"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); + const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; + uniform_types.*member_type = uniform_type; - u32 index{}; for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; + const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); - std::fill_n(cbufs.data() + desc.index, desc.count, id); - index += desc.count; + for (size_t i = 0; i < desc.count; ++i) { + cbufs[desc.index + i].*member_type = id; + } binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8b3109eb8..35eca258a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -10,8 +10,8 @@ #include #include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/shader_info.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" namespace Shader::Backend::SPIRV { @@ -34,6 +34,16 @@ struct TextureDefinition { Id type; }; +struct UniformDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program); @@ -45,7 +55,10 @@ public: Id void_id{}; Id U1{}; + Id U8{}; + Id S8{}; Id U16{}; + Id S16{}; Id U64{}; VectorTypes F32; VectorTypes U32; @@ -56,10 +69,11 @@ public: Id false_value{}; Id u32_zero_value{}; - Id uniform_u32{}; + UniformDefinitions uniform_types; + Id storage_u32{}; - std::array cbufs{}; + std::array cbufs{}; std::array ssbos{}; std::vector textures; @@ -71,6 +85,8 @@ private: void DefineCommonConstants(); void DefineSpecialVariables(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); + void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, + Id type, char type_char, u32 element_size); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 69698c478..aafc59bbb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -34,7 +34,13 @@ void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); -Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx); void EmitSetAttribute(EmitContext& ctx); void EmitGetAttributeIndexed(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index eb9c01c5a..125b58cf7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -30,17 +30,61 @@ void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, + u32 element_size, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const Id uniform_type{ctx.uniform_types.*member_ptr}; if (!offset.IsImmediate()) { - throw NotImplementedException("Variable constant buffer offset"); + Id index{ctx.Def(offset)}; + if (element_size > 1) { + const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; + const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; + index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; + return ctx.OpLoad(result_type, access_chain); } - const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / 4)}; - const Id cbuf{ctx.cbufs[binding.U32()]}; - const Id access_chain{ctx.OpAccessChain(ctx.uniform_u32, cbuf, ctx.u32_zero_value, imm_offset)}; - return ctx.OpLoad(ctx.U32[1], access_chain); + if (offset.U32() % element_size != 0) { + throw NotImplementedException("Unaligned immediate constant buffer load"); + } + const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(result_type, access_chain); +} + +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); +} + +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); +} + +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); +} + +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); +} + +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); +} + +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); +} + +Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); } void EmitGetAttribute(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ae3354c66..33819dd36 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -112,7 +112,27 @@ void IREmitter::SetPred(IR::Pred pred, const U1& value) { } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { - return Inst(Opcode::GetCbuf, binding, byte_offset); + return Inst(Opcode::GetCbufU32, binding, byte_offset); +} + +UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed) { + switch (bitsize) { + case 8: + return Inst(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); + case 16: + return Inst(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset); + case 32: + return Inst(Opcode::GetCbufU32, binding, byte_offset); + case 64: + return Inst(Opcode::GetCbufU64, binding, byte_offset); + default: + throw InvalidArgument("Invalid bit size {}", bitsize); + } +} + +F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) { + return Inst(Opcode::GetCbufF32, binding, byte_offset); } U1 IREmitter::GetZFlag() { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index cb2a7710a..e4d110540 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -47,6 +47,9 @@ public: void SetGotoVariable(u32 id, const U1& value); [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); + [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed); + [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); [[nodiscard]] U1 GetSFlag(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aa011fab1..64bd495ed 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -21,7 +21,13 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetCbufU8, U32, U32, U32, ) +OPCODE(GetCbufS8, U32, U32, U32, ) +OPCODE(GetCbufU16, U32, U32, U32, ) +OPCODE(GetCbufS16, U32, U32, U32, ) +OPCODE(GetCbufU32, U32, U32, U32, ) +OPCODE(GetCbufF32, F32, U32, U32, ) +OPCODE(GetCbufU64, U64, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) OPCODE(GetAttributeIndexed, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index a5a0e1a9b..7564aeeb2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -56,25 +56,32 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } -IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { +static std::pair CbufAddr(u64 insn) { union { u64 raw; BitField<20, 14, s64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; + if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } - const IR::U32 binding{ir.Imm32(static_cast(cbuf.binding))}; - const IR::U32 byte_offset{ir.Imm32(static_cast(cbuf.offset) * 4)}; + const IR::Value binding{static_cast(cbuf.binding)}; + const IR::Value byte_offset{static_cast(cbuf.offset) * 4}; + return {IR::U32{binding}, IR::U32{byte_offset}}; +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + const auto[binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - return ir.BitCast(GetCbuf(insn)); + const auto[binding, byte_offset]{CbufAddr(insn)}; + return ir.GetFloatCbuf(binding, byte_offset); } IR::U32 TranslatorVisitor::GetImm20(u64 insn) { @@ -83,6 +90,7 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; + if (imm.is_negative != 0) { const s64 raw{static_cast(imm.value)}; return ir.Imm32(static_cast(-(1LL << 19) + raw)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..39becf93c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -0,0 +1,85 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, + const IR::U32& reg, const IR::U32& imm) { + switch (mode) { + case Mode::Default: + return {imm_index, ir.IAdd(reg, imm)}; + default: + break; + } + throw NotImplementedException("Mode {}", mode); +} +} // Anonymous namespace + +void TranslatorVisitor::LDC(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; + } const ldc{insn}; + + const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; + const IR::U32 reg{X(ldc.src_reg)}; + const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; + const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; + switch (ldc.size) { + case Size::U8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + break; + case Size::S8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + break; + case Size::U16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + break; + case Size::S16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + break; + case Size::B32: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + break; + case Size::B64: { + if (!IR::IsAligned(ldc.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register"); + } + const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + for (int i = 0; i < 2; ++i) { + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid size {}", ldc.size.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ff429c126..5b153acff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -425,10 +425,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDC(u64) { - ThrowNotImplemented(Opcode::LDC); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 960beadd4..cdbe85221 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -25,18 +25,13 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { - case IR::Opcode::WorkgroupId: - info.uses_workgroup_id = true; - break; - case IR::Opcode::LocalInvocationId: - info.uses_local_invocation_id = true; - break; case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: case IR::Opcode::PackFloat2x16: @@ -75,13 +70,139 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPTrunc64: info.uses_fp64 = true; break; - case IR::Opcode::GetCbuf: + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::UndefU8: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + case IR::Opcode::SelectU8: + info.uses_int8 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::UndefU16: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + case IR::Opcode::SelectU16: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS16F32: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU16F32: + case IR::Opcode::ConvertU16F64: + info.uses_int16 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU64: + case IR::Opcode::UndefU64: + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + case IR::Opcode::SelectU64: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + case IR::Opcode::IAdd64: + case IR::Opcode::ISub64: + case IR::Opcode::INeg64: + case IR::Opcode::ShiftLeftLogical64: + case IR::Opcode::ShiftRightLogical64: + case IR::Opcode::ShiftRightArithmetic64: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertS64F32: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertU64F32: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertU64U32: + case IR::Opcode::ConvertU32U64: + case IR::Opcode::ConvertF16U64: + case IR::Opcode::ConvertF32U64: + case IR::Opcode::ConvertF64U64: + info.uses_int64 = true; + break; + default: + break; + } + switch (inst.Opcode()) { + case IR::Opcode::WorkgroupId: + info.uses_workgroup_id = true; + break; + case IR::Opcode::LocalInvocationId: + info.uses_local_invocation_id = true; + break; + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU64: { if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } + switch (inst.Opcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + info.used_constant_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + info.used_constant_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::GetCbufU32: + info.used_constant_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::GetCbufF32: + info.used_constant_buffer_types |= IR::Type::F32; + break; + case IR::Opcode::GetCbufU64: + info.used_constant_buffer_types |= IR::Type::U64; + break; + default: + break; + } break; + } case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ae3d5a7d6..7ba9ebe9b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -193,7 +193,7 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; @@ -207,7 +207,7 @@ void FoldISub32(IR::Inst& inst) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbuf) { + if (op_b->Opcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; @@ -277,7 +277,7 @@ void FoldLogicalNot(IR::Inst& inst) { } } -template +template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { @@ -285,8 +285,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (value.InstRecursive()->Opcode() == reverse) { + if (arg_inst->Opcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } + if constexpr (op == IR::Opcode::BitCastF32U32) { + if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + // Replace the bitcast with a typed constant buffer read + inst.ReplaceOpcode(IR::Opcode::GetCbufF32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } } } @@ -325,9 +335,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ISub32: return FoldISub32(inst); case IR::Opcode::BitCastF32U32: - return FoldBitCast(inst, IR::Opcode::BitCastU32F32); + return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: - return FoldBitCast(inst, IR::Opcode::BitCastF32U32); + return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); case IR::Opcode::SelectU32: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2625c0bb2..5d98d278e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -203,7 +203,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 80e4ad6a9..ec802e02c 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -78,7 +78,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, return std::nullopt; } const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbuf) { + if (inst->Opcode() == IR::Opcode::GetCbufU32) { const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 103a2f0b4..adc1d9a64 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -7,6 +7,7 @@ #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/type.h" #include #include @@ -61,10 +62,15 @@ struct Info { bool uses_fp16_denorms_preserve{}; bool uses_fp32_denorms_flush{}; bool uses_fp32_denorms_preserve{}; + bool uses_int8{}; + bool uses_int16{}; + bool uses_int64{}; bool uses_image_1d{}; bool uses_sampled_1d{}; bool uses_sparse_residency{}; + IR::Type used_constant_buffer_types{}; + u32 constant_buffer_mask{}; boost::container::static_vector From ba8c1d2eb479d04b2b0d847efd67468b688765d4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 10 Mar 2021 22:42:17 -0500 Subject: [PATCH 059/770] shader: Implement FCMP still need to configure some settings for NV denorm flush and intel NaN --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 1 + .../spirv/emit_spirv_floating_point.cpp | 4 + .../frontend/ir/ir_emitter.cpp | 80 ++++++++---- .../frontend/ir/ir_emitter.h | 19 ++- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/floating_point_compare.cpp | 116 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 16 --- .../ir_opt/collect_shader_info_pass.cpp | 14 ++- 9 files changed, 203 insertions(+), 50 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 755db5dfa..b45ff53b6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -67,6 +67,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp + frontend/maxwell/translate/impl/floating_point_compare.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index aafc59bbb..b09978073 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -232,6 +232,7 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan32(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 749f11742..a359c42fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -346,4 +346,8 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); } +Id EmitFPIsNan32(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 33819dd36..5d475207e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -697,93 +697,107 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { } } -U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control}, + lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control}, + lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control}, + lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: return Inst(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F32: return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F64: return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, - lhs, rhs); + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } @@ -791,20 +805,32 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, boo case Type::F16: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual16 : Opcode::FPUnordGreaterThanEqual16, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F32: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 : Opcode::FPUnordGreaterThanEqual32, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F64: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 : Opcode::FPUnordGreaterThanEqual64, - lhs, rhs); + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } +U1 IREmitter::FPIsNan(const F32& value) { + return Inst(Opcode::FPIsNan32, value); +} + +U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { + return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); +} + +U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { + return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e4d110540..5cfe1a54a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -140,14 +140,21 @@ public: [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); - [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, + FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, - bool ordered = true); + FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, - bool ordered = true); + FpControl control = {}, bool ordered = true); + [[nodiscard]] U1 FPIsNan(const F32& value); + [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); + [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 64bd495ed..476281789 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -197,6 +197,7 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) +<<<<<<< HEAD OPCODE(FPOrdEqual16, U1, F16, F16, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) @@ -233,6 +234,7 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan32, U1, F32, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..21cb80d67 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fcmp{insn}; + + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; + IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; + const IR::U32 src_reg{v.X(fcmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(fcmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FCMP_reg(u64 insn) { + FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_rc(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FCMP_cr(u64 insn) { + FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_imm(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 5b153acff..e1904472f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,22 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FCMP_reg(u64) { - ThrowNotImplemented(Opcode::FCMP_reg); -} - -void TranslatorVisitor::FCMP_rc(u64) { - ThrowNotImplemented(Opcode::FCMP_rc); -} - -void TranslatorVisitor::FCMP_cr(u64) { - ThrowNotImplemented(Opcode::FCMP_cr); -} - -void TranslatorVisitor::FCMP_imm(u64) { - ThrowNotImplemented(Opcode::FCMP_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cdbe85221..70d75ad6c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -256,7 +256,19 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven32: case IR::Opcode::FPFloor32: case IR::Opcode::FPCeil32: - case IR::Opcode::FPTrunc32: { + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: { const auto control{inst.Flags()}; switch (control.fmz_mode) { case IR::FmzMode::DontCare: From 2d422b2498868e297939c6907a7ef1386ceb1d57 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 13 Mar 2021 02:23:26 -0300 Subject: [PATCH 060/770] shader: Fix rebase issue --- src/shader_recompiler/frontend/ir/opcodes.inc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 476281789..9052a4903 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -197,7 +197,6 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) -<<<<<<< HEAD OPCODE(FPOrdEqual16, U1, F16, F16, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) From 8d470c2e63c2dac334ccff2bcda9a0607ce76377 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:23:56 -0500 Subject: [PATCH 061/770] shader: Implement FMNMX And add a const in FCMP --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 8 +-- .../spirv/emit_spirv_floating_point.cpp | 16 +++--- .../frontend/ir/ir_emitter.cpp | 28 +++++++++ .../frontend/ir/ir_emitter.h | 2 + .../translate/impl/floating_point_compare.cpp | 2 +- .../translate/impl/floating_point_min_max.cpp | 57 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 ---- 8 files changed, 101 insertions(+), 25 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b45ff53b6..171fdd321 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -70,6 +70,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_compare.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp + frontend/maxwell/translate/impl/floating_point_min_max.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b09978073..89566c83d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -162,10 +162,10 @@ Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -void EmitFPMax32(EmitContext& ctx); -void EmitFPMax64(EmitContext& ctx); -void EmitFPMin32(EmitContext& ctx); -void EmitFPMin64(EmitContext& ctx); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index a359c42fc..e635b1ffb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -60,20 +60,20 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitFPMax32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F32[1], a, b); } -void EmitFPMax64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F64[1], a, b); } -void EmitFPMin32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F32[1], a, b); } -void EmitFPMin64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F64[1], a, b); } Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5d475207e..556961fa4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -831,6 +831,34 @@ U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMax32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMax64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMin32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMin64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5cfe1a54a..74fb3dbcb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -155,6 +155,8 @@ public: [[nodiscard]] U1 FPIsNan(const F32& value); [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 21cb80d67..f254ecb3a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -88,7 +88,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c3180a9bd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const fmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; + IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; + + if (fmnmx.neg_pred != 0) { + std::swap(min, max); + } + + v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FMNMX_reg(u64 insn) { + FMNMX(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMNMX_cbuf(u64 insn) { + FMNMX(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMNMX_imm(u64 insn) { + FMNMX(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e1904472f..01ecbb4cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,18 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FMNMX_reg(u64) { - ThrowNotImplemented(Opcode::FMNMX_reg); -} - -void TranslatorVisitor::FMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::FMNMX_cbuf); -} - -void TranslatorVisitor::FMNMX_imm(u64) { - ThrowNotImplemented(Opcode::FMNMX_imm); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } From b9f7bf4472b8e0a5aad1aec3a5ff5bb56470bfff Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:51:40 -0500 Subject: [PATCH 062/770] spirv: Add SignedZeroInfNanPreserve logic --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 ++++++ src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index a94e9cb2d..c7cba6279 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -124,6 +124,12 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } ctx.AddExtension("SPV_KHR_float_controls"); + if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + } + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + } if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 917fc1251..c6a143598 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,8 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; + bool support_fp16_signed_zero_nan_preserve{}; + bool support_fp32_signed_zero_nan_preserve{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bcb7dd2eb..5477a2903 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -193,6 +193,10 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; From 71f96fa6366dc6dd306a953bca1b958fb32bc55a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Mar 2021 03:41:05 -0300 Subject: [PATCH 063/770] shader: Implement CAL inlining function calls --- src/shader_recompiler/CMakeLists.txt | 6 +- .../backend/spirv/emit_context.cpp | 6 +- .../backend/spirv/emit_spirv.cpp | 17 +- .../frontend/ir/function.cpp | 5 - src/shader_recompiler/frontend/ir/function.h | 18 -- src/shader_recompiler/frontend/ir/program.cpp | 18 +- src/shader_recompiler/frontend/ir/program.h | 5 +- .../frontend/maxwell/control_flow.cpp | 78 +++--- .../frontend/maxwell/control_flow.h | 19 +- .../frontend/maxwell/program.cpp | 71 ++--- .../structured_control_flow.cpp | 254 ++++++++++-------- .../{ir => maxwell}/structured_control_flow.h | 12 +- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../translate/impl/not_implemented.cpp | 4 +- .../ir_opt/collect_shader_info_pass.cpp | 8 +- .../ir_opt/constant_propagation_pass.cpp | 8 +- .../ir_opt/dead_code_elimination_pass.cpp | 10 +- .../global_memory_to_storage_buffer_pass.cpp | 12 +- .../ir_opt/identity_removal_pass.cpp | 4 +- .../ir_opt/lower_fp16_to_fp32.cpp | 8 +- src/shader_recompiler/ir_opt/passes.h | 18 +- .../ir_opt/ssa_rewrite_pass.cpp | 5 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 12 +- .../ir_opt/verification_pass.cpp | 16 +- 24 files changed, 286 insertions(+), 330 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/function.cpp delete mode 100644 src/shader_recompiler/frontend/ir/function.h rename src/shader_recompiler/frontend/{ir => maxwell}/structured_control_flow.cpp (74%) rename src/shader_recompiler/frontend/{ir => maxwell}/structured_control_flow.h (50%) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 171fdd321..20409e09a 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -27,8 +27,6 @@ add_library(shader_recompiler STATIC frontend/ir/condition.h frontend/ir/flow_test.cpp frontend/ir/flow_test.h - frontend/ir/function.cpp - frontend/ir/function.h frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp @@ -43,8 +41,6 @@ add_library(shader_recompiler STATIC frontend/ir/program.cpp frontend/ir/program.h frontend/ir/reg.h - frontend/ir/structured_control_flow.cpp - frontend/ir/structured_control_flow.h frontend/ir/type.cpp frontend/ir/type.h frontend/ir/value.cpp @@ -60,6 +56,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h + frontend/maxwell/structured_control_flow.cpp + frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp frontend/maxwell/translate/impl/common_encoding.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 278b26b50..f848c6175 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -262,10 +262,8 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } void EmitContext::DefineLabels(IR::Program& program) { - for (const IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - block->SetDefinition(OpLabel()); - } + for (IR::Block* const block : program.blocks) { + block->SetDefinition(OpLabel()); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c7cba6279..7e7db9161 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -10,7 +10,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" @@ -199,18 +198,14 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{profile, program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; - // FIXME: Forward declare functions (needs sirit support) - Id func{}; - for (IR::Function& function : program.functions) { - func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); - for (IR::Block* const block : function.blocks) { - ctx.AddLabel(block->Definition()); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } + const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + ctx.AddLabel(block->Definition()); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); } - ctx.OpFunctionEnd(); } + ctx.OpFunctionEnd(); boost::container::small_vector interfaces; const Info& info{program.info}; if (info.uses_workgroup_id) { diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp deleted file mode 100644 index d1fc9461d..000000000 --- a/src/shader_recompiler/frontend/ir/function.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h deleted file mode 100644 index d1f061146..000000000 --- a/src/shader_recompiler/frontend/ir/function.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" - -namespace Shader::IR { - -struct Function { - BlockList blocks; - BlockList post_order_blocks; -}; - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 8c301c3a1..5f51aeb5f 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -9,7 +9,8 @@ #include -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::IR { @@ -19,18 +20,13 @@ std::string DumpProgram(const Program& program) { std::map inst_to_index; std::map block_to_index; - for (const IR::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); - ++index; - } + for (const IR::Block* const block : program.blocks) { + block_to_index.emplace(block, index); + ++index; } std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } + for (const auto& block : program.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 98aab2dc6..bce8b19b3 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -8,13 +8,14 @@ #include -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" namespace Shader::IR { struct Program { - boost::container::small_vector functions; + BlockList blocks; + BlockList post_order_blocks; Info info; }; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index d0dc66330..715c0e92d 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -31,13 +31,12 @@ struct Compare { return lhs.begin < rhs.begin; } }; -} // Anonymous namespace -static u32 BranchOffset(Location pc, Instruction inst) { +u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static void Split(Block* old_block, Block* new_block, Location pc) { +void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } @@ -49,21 +48,19 @@ static void Split(Block* old_block, Block* new_block, Location pc) { .cond{old_block->cond}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, - .ir{nullptr}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, .stack{std::move(old_block->stack)}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{new_block}, .branch_false{nullptr}, - .ir{nullptr}, }; } -static Token OpcodeToken(Opcode opcode) { +Token OpcodeToken(Opcode opcode) { switch (opcode) { case Opcode::PBK: case Opcode::BRK: @@ -89,7 +86,7 @@ static Token OpcodeToken(Opcode opcode) { } } -static bool IsAbsoluteJump(Opcode opcode) { +bool IsAbsoluteJump(Opcode opcode) { switch (opcode) { case Opcode::JCAL: case Opcode::JMP: @@ -100,7 +97,7 @@ static bool IsAbsoluteJump(Opcode opcode) { } } -static bool HasFlowTest(Opcode opcode) { +bool HasFlowTest(Opcode opcode) { switch (opcode) { case Opcode::BRA: case Opcode::BRX: @@ -121,13 +118,14 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string NameOf(const Block& block) { +std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } } +} // Anonymous namespace void Stack::Push(Token token, Location target) { entries.push_back({ @@ -166,26 +164,24 @@ bool Block::Contains(Location pc) const noexcept { return pc >= begin && pc < end; } -Function::Function(Location start_address) +Function::Function(ObjectPool& block_pool, Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block{nullptr}, + .block{block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{true}, + .branch_true{nullptr}, + .branch_false{nullptr}, + })}, .stack{}, }} {} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_} { - functions.emplace_back(start_address); - functions.back().labels.back().block = block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .stack{}, - .cond{IR::Condition{true}}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .ir{nullptr}, - }); + functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -308,11 +304,17 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist - if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.emplace_back(cal_pc); + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + const bool exists{it != functions.end()}; + const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + if (!exists) { + functions.emplace_back(block_pool, cal_pc); } - // Handle CAL like a regular instruction - break; + block->end_class = EndClass::Call; + block->function_call = call_id; + block->return_block = AddLabel(block, block->stack, pc + 1, function_id); + block->end = pc; + return AnalysisState::Branch; } default: break; @@ -348,7 +350,6 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block}, .branch_false{nullptr}, - .ir{nullptr}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -401,16 +402,6 @@ void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } -void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { - const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - // Technically CAL pushes into PRET, but that's implicit in the function call for us - // Insert the function to the function list if it doesn't exist - const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; - if (it == functions.end()) { - functions.emplace_back(cal_pc); - } -} - CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; @@ -455,10 +446,9 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .end{pc}, .end_class{EndClass::Branch}, .stack{stack}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{nullptr}, .branch_false{nullptr}, - .ir{nullptr}, })}; function.labels.push_back(Label{ .address{pc}, @@ -495,6 +485,14 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::Call: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); + dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", + node_uid, block.function_call); + dot += '\n'; + ++node_uid; + break; case EndClass::Exit: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 209c9e551..fe74f210f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -20,16 +20,13 @@ #include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/object_pool.h" -namespace Shader::IR { -class Block; -} - namespace Shader::Maxwell::Flow { using FunctionId = size_t; enum class EndClass { Branch, + Call, Exit, Return, }; @@ -75,9 +72,14 @@ struct Block : boost::intrusive::set_base_hook< EndClass end_class; Stack stack; IR::Condition cond; - Block* branch_true; - Block* branch_false; - IR::Block* ir; + union { + Block* branch_true; + FunctionId function_call; + }; + union { + Block* branch_false; + Block* return_block; + }; }; struct Label { @@ -87,7 +89,7 @@ struct Label { }; struct Function { - Function(Location start_address); + explicit Function(ObjectPool& block_pool, Location start_address); Location entrypoint; boost::container::small_vector labels; @@ -137,7 +139,6 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); - void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b270bbccd..8bfa64326 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,67 +8,44 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { -namespace { -IR::BlockList TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::Function& cfg_function) { - const size_t num_blocks{cfg_function.blocks.size()}; - std::vector blocks(cfg_function.blocks.size()); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - const u32 begin{cfg_block.begin.Offset()}; - const u32 end{cfg_block.end.Offset()}; - blocks[i] = block_pool.Create(inst_pool, begin, end); - cfg_block.ir = blocks[i]; - ++i; + +static void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const IR::BlockList& post_order{program.post_order_blocks}; + std::erase_if(program.blocks, [&](IR::Block* block) { + return std::ranges::find(post_order, block) == post_order.end(); }); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - IR::Block* const block{blocks[i]}; - ++i; - if (cfg_block.end_class != Flow::EndClass::Branch) { - block->SetReturn(); - } else if (cfg_block.cond == IR::Condition{true}) { - block->SetBranch(cfg_block.branch_true->ir); - } else if (cfg_block.cond == IR::Condition{false}) { - block->SetBranch(cfg_block.branch_false->ir); - } else { - block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, - cfg_block.branch_false->ir); - } - }); - return IR::VisitAST(inst_pool, block_pool, blocks, - [&](IR::Block* block) { Translate(env, block); }); } -} // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - auto& functions{program.functions}; - functions.reserve(cfg.Functions().size()); - for (Flow::Function& cfg_function : cfg.Functions()) { - functions.push_back(IR::Function{ - .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, - .post_order_blocks{}, - }); - } + program.blocks = VisitAST(inst_pool, block_pool, env, cfg); + program.post_order_blocks = PostOrder(program.blocks); + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite Optimization::LowerFp16ToFp32(program); - for (IR::Function& function : functions) { - function.post_order_blocks = PostOrder(function.blocks); - Optimization::SsaRewritePass(function.post_order_blocks); - } + + Optimization::SsaRewritePass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - for (IR::Function& function : functions) { - Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); - Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); - Optimization::IdentityRemovalPass(function); - Optimization::VerificationPass(function); - } + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::IdentityRemovalPass(program); + Optimization::VerificationPass(program); Optimization::CollectShaderInfoPass(program); return program; } diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp similarity index 74% rename from src/shader_recompiler/frontend/ir/structured_control_flow.cpp rename to src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index bfba55a7e..5f5d9cf17 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -14,11 +14,14 @@ #include +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/object_pool.h" -namespace Shader::IR { +namespace Shader::Maxwell { namespace { struct Statement; @@ -79,7 +82,7 @@ struct Variable {}; #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement #endif struct Statement : ListBaseHook { - Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} Statement(Goto, Statement* cond_, Node label_, Statement* up_) : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} @@ -91,7 +94,7 @@ struct Statement : ListBaseHook { : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} - Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} Statement(Or, Statement* op_a_, Statement* op_b_) : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} @@ -106,10 +109,10 @@ struct Statement : ListBaseHook { } union { - Block* code; + IR::Block* code; Node label; Tree children; - Condition guest_cond; + IR::Condition guest_cond; Statement* op; Statement* op_a; }; @@ -269,9 +272,10 @@ bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { class GotoPass { public: - explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) - : pool{stmt_pool} { - std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; + explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, + ObjectPool& block_pool_, ObjectPool& stmt_pool) + : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + std::vector gotos{BuildTree(cfg)}; for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); } @@ -316,18 +320,20 @@ private: } } // TODO: Remove this - Node it{goto_stmt}; - bool sibling{false}; - do { - sibling |= it == label_stmt; - --it; - } while (it != goto_stmt->up->children.begin()); - while (it != goto_stmt->up->children.end()) { - sibling |= it == label_stmt; - ++it; - } - if (!sibling) { - throw LogicError("Not siblings"); + { + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } } // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { @@ -342,63 +348,84 @@ private: } } - std::vector BuildUnorderedTreeGetGotos(std::span blocks) { - // Assume all blocks have two branches + std::vector BuildTree(Flow::CFG& cfg) { + u32 label_id{0}; std::vector gotos; - gotos.reserve(blocks.size() * 2); - - const std::unordered_map labels_map{BuildLabels(blocks)}; - Tree& root{root_stmt.children}; - auto insert_point{root.begin()}; - // Skip all goto variables zero-initialization - std::advance(insert_point, labels_map.size()); - - for (Block* const block : blocks) { - // Skip label - ++insert_point; - // Skip set variable - ++insert_point; - root.insert(insert_point, *pool.Create(block, &root_stmt)); - - if (block->IsTerminationBlock()) { - root.insert(insert_point, *pool.Create(Return{})); - continue; - } - const Condition cond{block->BranchCondition()}; - Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; - if (cond == Condition{true} || cond == Condition{false}) { - const bool is_true{cond == Condition{true}}; - const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; - const Node label{labels_map.at(branch)}; - Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; - gotos.push_back(root.insert(insert_point, *goto_stmt)); - } else { - Statement* const ident_cond{pool.Create(Identity{}, cond)}; - const Node true_label{labels_map.at(block->TrueBranch())}; - const Node false_label{labels_map.at(block->FalseBranch())}; - Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; - Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; - gotos.push_back(root.insert(insert_point, *goto_true)); - gotos.push_back(root.insert(insert_point, *goto_false)); - } - } + Flow::Function& first_function{cfg.Functions().front()}; + BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); return gotos; } - std::unordered_map BuildLabels(std::span blocks) { - // TODO: Consider storing labels intrusively inside the block - std::unordered_map labels_map; + void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, + std::vector& gotos, Node function_insert_point, + std::optional return_label) { + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; Tree& root{root_stmt.children}; - u32 label_id{0}; - for (const Block* const block : blocks) { + std::unordered_map local_labels; + local_labels.reserve(function.blocks.size()); + + for (Flow::Block& block : function.blocks) { Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; - labels_map.emplace(block, root.insert(root.end(), *label)); - Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; - root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); - root.push_front(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + const Node label_it{root.insert(function_insert_point, *label)}; + local_labels.emplace(&block, label_it); ++label_id; } - return labels_map; + for (Flow::Block& block : function.blocks) { + const Node label{local_labels.at(&block)}; + // Insertion point + const Node ip{std::next(label)}; + + // Reset goto variables before the first block and after its respective label + const auto make_reset_variable{[&]() -> Statement& { + return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); + }}; + root.push_front(make_reset_variable()); + root.insert(ip, make_reset_variable()); + + const u32 begin_offset{block.begin.Offset()}; + const u32 end_offset{block.end.Offset()}; + IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; + root.insert(ip, *pool.Create(ir_block, &root_stmt)); + + switch (block.end_class) { + case Flow::EndClass::Branch: { + Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + if (block.cond == IR::Condition{true}) { + const Node true_label{local_labels.at(block.branch_true)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); + } else if (block.cond == IR::Condition{false}) { + const Node false_label{local_labels.at(block.branch_false)}; + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } else { + const Node true_label{local_labels.at(block.branch_true)}; + const Node false_label{local_labels.at(block.branch_false)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } + break; + } + case Flow::EndClass::Call: { + Flow::Function& call{cfg.Functions()[block.function_call]}; + const Node call_return_label{local_labels.at(block.return_block)}; + BuildTree(cfg, call, label_id, gotos, ip, call_return_label); + break; + } + case Flow::EndClass::Exit: + root.insert(ip, *pool.Create(Return{})); + break; + case Flow::EndClass::Return: { + Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + break; + } + } + } } void UpdateTreeUp(Statement* tree) { @@ -556,11 +583,13 @@ private: return offset; } + ObjectPool& inst_pool; + ObjectPool& block_pool; ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; -Block* TryFindForwardBlock(const Statement& stmt) { +IR::Block* TryFindForwardBlock(const Statement& stmt) { const Tree& tree{stmt.up->children}; const ConstNode end{tree.cend()}; ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; @@ -573,12 +602,12 @@ Block* TryFindForwardBlock(const Statement& stmt) { return nullptr; } -[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { +[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { switch (stmt.type) { case StatementType::Identity: return ir.Condition(stmt.guest_cond); case StatementType::Not: - return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); + return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); case StatementType::Or: return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); case StatementType::Variable: @@ -590,18 +619,18 @@ Block* TryFindForwardBlock(const Statement& stmt) { class TranslatePass { public: - TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, - ObjectPool& stmt_pool_, Statement& root_stmt, - const std::function& func_, BlockList& block_list_) - : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, + TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, + ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, + IR::BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); } private: - void Visit(Statement& parent, Block* continue_block, Block* break_block) { + void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { Tree& tree{parent.children}; - Block* current_block{nullptr}; + IR::Block* current_block{nullptr}; for (auto it = tree.begin(); it != tree.end(); ++it) { Statement& stmt{*it}; @@ -611,11 +640,10 @@ private: break; case StatementType::Code: { if (current_block && current_block != stmt.code) { - IREmitter ir{*current_block}; - ir.Branch(stmt.code); + IR::IREmitter{*current_block}.Branch(stmt.code); } current_block = stmt.code; - func(stmt.code); + Translate(env, stmt.code); block_list.push_back(stmt.code); break; } @@ -623,7 +651,7 @@ private: if (!current_block) { current_block = MergeBlock(parent, stmt); } - IREmitter ir{*current_block}; + IR::IREmitter ir{*current_block}; ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } @@ -632,16 +660,16 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - Block* const merge_block{MergeBlock(parent, stmt)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; // Visit children const size_t first_block_index{block_list.size()}; Visit(stmt, merge_block, break_block); // Implement if header block - Block* const first_if_block{block_list.at(first_block_index)}; - IREmitter ir{*current_block}; - const U1 cond{VisitExpr(ir, *stmt.cond)}; + IR::Block* const first_if_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*current_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; ir.SelectionMerge(merge_block); ir.BranchConditional(cond, first_if_block, merge_block); @@ -649,14 +677,14 @@ private: break; } case StatementType::Loop: { - Block* const loop_header_block{block_pool.Create(inst_pool)}; + IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; if (current_block) { - IREmitter{*current_block}.Branch(loop_header_block); + IR::IREmitter{*current_block}.Branch(loop_header_block); } block_list.push_back(loop_header_block); - Block* const new_continue_block{block_pool.Create(inst_pool)}; - Block* const merge_block{MergeBlock(parent, stmt)}; + IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; // Visit children const size_t first_block_index{block_list.size()}; @@ -666,14 +694,14 @@ private: block_list.push_back(new_continue_block); // Implement loop header block - Block* const first_loop_block{block_list.at(first_block_index)}; - IREmitter ir{*loop_header_block}; + IR::Block* const first_loop_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*loop_header_block}; ir.LoopMerge(merge_block, new_continue_block); ir.Branch(first_loop_block); // Implement continue block - IREmitter continue_ir{*new_continue_block}; - const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + IR::IREmitter continue_ir{*new_continue_block}; + const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; continue_ir.BranchConditional(continue_cond, ir.block, merge_block); current_block = merge_block; @@ -684,9 +712,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - Block* const skip_block{MergeBlock(parent, stmt)}; + IR::Block* const skip_block{MergeBlock(parent, stmt)}; - IREmitter ir{*current_block}; + IR::IREmitter ir{*current_block}; ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); current_block = skip_block; @@ -697,7 +725,7 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IREmitter{*current_block}.Return(); + IR::IREmitter{*current_block}.Return(); current_block = nullptr; break; } @@ -706,39 +734,37 @@ private: } } if (current_block && continue_block) { - IREmitter ir{*current_block}; - ir.Branch(continue_block); + IR::IREmitter{*current_block}.Branch(continue_block); } } - Block* MergeBlock(Statement& parent, Statement& stmt) { - if (Block* const block{TryFindForwardBlock(stmt)}) { + IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + if (IR::Block* const block{TryFindForwardBlock(stmt)}) { return block; } // Create a merge block we can visit later - Block* const block{block_pool.Create(inst_pool)}; + IR::Block* const block{block_pool.Create(inst_pool)}; Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); return block; } ObjectPool& stmt_pool; - ObjectPool& inst_pool; - ObjectPool& block_pool; - const std::function& func; - BlockList& block_list; + ObjectPool& inst_pool; + ObjectPool& block_pool; + Environment& env; + IR::BlockList& block_list; }; } // Anonymous namespace -BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span unordered_blocks, - const std::function& func) { +IR::BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; - GotoPass goto_pass{unordered_blocks, stmt_pool}; - BlockList block_list; - TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), - func, block_list}; + GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + Statement& root{goto_pass.RootStatement()}; + IR::BlockList block_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; return block_list; } -} // namespace Shader::IR +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h similarity index 50% rename from src/shader_recompiler/frontend/ir/structured_control_flow.h rename to src/shader_recompiler/frontend/maxwell/structured_control_flow.h index a574c24f7..e4797291e 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -9,14 +9,16 @@ #include +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -namespace Shader::IR { +namespace Shader::Maxwell { -[[nodiscard]] BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span unordered_blocks, - const std::function& func); +[[nodiscard]] IR::BlockList VisitAST(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); -} // namespace Shader::IR +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c6253c40c..45d6f5e06 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -62,7 +62,7 @@ public: void BRA(u64 insn); void BRK(u64 insn); void BRX(u64 insn); - void CAL(u64 insn); + void CAL(); void CCTL(u64 insn); void CCTLL(u64 insn); void CONT(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 01ecbb4cc..92da5c7e8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,8 +65,8 @@ void TranslatorVisitor::BRX(u64) { ThrowNotImplemented(Opcode::BRX); } -void TranslatorVisitor::CAL(u64) { - ThrowNotImplemented(Opcode::CAL); +void TranslatorVisitor::CAL() { + // CAL is a no-op } void TranslatorVisitor::CCTL(u64) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 70d75ad6c..708b6b267 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -296,11 +296,9 @@ void Visit(Info& info, IR::Inst& inst) { void CollectShaderInfoPass(IR::Program& program) { Info& info{program.info}; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - Visit(info, inst); - } + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); } } } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7ba9ebe9b..a39db2bf1 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -371,9 +371,11 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } } // Anonymous namespace -void ConstantPropagationPass(IR::Block& block) { - for (IR::Inst& inst : block) { - ConstantPropagation(block, inst); +void ConstantPropagationPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + ConstantPropagation(*block, inst); + } } } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 132b2012a..8ad59f42e 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -10,12 +10,14 @@ namespace Shader::Optimization { -void DeadCodeEliminationPass(IR::Block& block) { +void DeadCodeEliminationPass(IR::Program& program) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (IR::Inst& inst : block | std::views::reverse) { - if (!inst.HasUses() && !inst.MayHaveSideEffects()) { - inst.Invalidate(); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions() | std::views::reverse) { + if (!inst.HasUses() && !inst.MayHaveSideEffects()) { + inst.Invalidate(); + } } } } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 5d98d278e..1faa1ec88 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,14 +351,12 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (!IsGlobalMemory(inst)) { - continue; - } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); } } Info& info{program.info}; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 593efde39..8790b48f2 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -10,10 +10,10 @@ namespace Shader::Optimization { -void IdentityRemovalPass(IR::Function& function) { +void IdentityRemovalPass(IR::Program& program) { std::vector to_invalidate; - for (IR::Block* const block : function.blocks) { + for (IR::Block* const block : program.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 14a5cb50f..74acb8bb6 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -77,11 +77,9 @@ IR::Opcode Replace(IR::Opcode op) { } // Anonymous namespace void LowerFp16ToFp32(IR::Program& program) { - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); } } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 3b7e7306b..5c1fc166c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,26 +8,18 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { -template -void PostOrderInvoke(Func&& func, IR::Function& function) { - for (const auto& block : function.post_order_blocks) { - func(*block); - } -} - void CollectShaderInfoPass(IR::Program& program); -void ConstantPropagationPass(IR::Block& block); -void DeadCodeEliminationPass(IR::Block& block); +void ConstantPropagationPass(IR::Program& program); +void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); -void IdentityRemovalPass(IR::Function& function); +void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); -void SsaRewritePass(std::span post_order_blocks); +void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); -void VerificationPass(const IR::Function& function); +void VerificationPass(const IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 19d35b1f8..f89fd51c8 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -23,7 +23,6 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" @@ -262,9 +261,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { } } // Anonymous namespace -void SsaRewritePass(std::span post_order_blocks) { +void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { VisitBlock(pass, block); } } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index ec802e02c..de9d633e2 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -164,14 +164,12 @@ private: void TexturePass(Environment& env, IR::Program& program) { TextureInstVector to_replace; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (!IsTextureInstruction(inst)) { - continue; - } - to_replace.push_back(MakeInst(env, block, inst)); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; } + to_replace.push_back(MakeInst(env, block, inst)); } } // Sort instructions to visit textures by constant buffer index, then by offset diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 32b56eb57..4080b37cc 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -11,8 +11,8 @@ namespace Shader::Optimization { -static void ValidateTypes(const IR::Function& function) { - for (const auto& block : function.blocks) { +static void ValidateTypes(const IR::Program& program) { + for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { if (inst.Opcode() == IR::Opcode::Phi) { // Skip validation on phi nodes @@ -30,9 +30,9 @@ static void ValidateTypes(const IR::Function& function) { } } -static void ValidateUses(const IR::Function& function) { +static void ValidateUses(const IR::Program& program) { std::map actual_uses; - for (const auto& block : function.blocks) { + for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { @@ -45,14 +45,14 @@ static void ValidateUses(const IR::Function& function) { } for (const auto [inst, uses] : actual_uses) { if (inst->UseCount() != uses) { - throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); + throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program)); } } } -void VerificationPass(const IR::Function& function) { - ValidateTypes(function); - ValidateUses(function); +void VerificationPass(const IR::Program& program) { + ValidateTypes(program); + ValidateUses(program); } } // namespace Shader::Optimization From 17a82b56d74afcebaad78ce4754d8ee99ea66f93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Mar 2021 04:54:43 -0300 Subject: [PATCH 064/770] shader: Implement TEXS --- src/shader_recompiler/CMakeLists.txt | 3 +- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../frontend/ir/ir_emitter.cpp | 8 + .../frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/modifiers.h | 3 +- .../translate/impl/not_implemented.cpp | 4 - .../{texture_sample.cpp => texture_fetch.cpp} | 0 .../translate/impl/texture_fetch_swizzled.cpp | 262 ++++++++++++++++++ 8 files changed, 287 insertions(+), 7 deletions(-) rename src/shader_recompiler/frontend/maxwell/translate/impl/{texture_sample.cpp => texture_fetch.cpp} (100%) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 20409e09a..97e9b4c8e 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -102,7 +102,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp - frontend/maxwell/translate/impl/texture_sample.cpp + frontend/maxwell/translate/impl/texture_fetch.cpp + frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 5f4783c95..f75152911 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -57,18 +57,27 @@ Id Texture(EmitContext& ctx, const IR::Value& index) { throw NotImplementedException("Indirect texture sample"); } +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { + const auto info{inst->Flags()}; + if (info.relaxed_precision != 0) { + ctx.Decorate(sample, spv::Decoration::RelaxedPrecision); + } + return sample; +} + template Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, Id result_type, Args&&... args) { IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; if (!sparse) { - return (ctx.*non_sparse_ptr)(result_type, std::forward(args)...); + return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward(args)...)); } const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward(args)...)}; const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); sparse->Invalidate(); + Decorate(ctx, inst, sample); return ctx.OpCompositeExtract(result_type, sample, 1U); } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 556961fa4..d94596ee9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -512,6 +512,14 @@ Value IREmitter::UnpackFloat2x16(const U32& value) { return Inst(Opcode::UnpackFloat2x16, value); } +U32 IREmitter::PackHalf2x16(const Value& vector) { + return Inst(Opcode::PackHalf2x16, vector); +} + +Value IREmitter::UnpackHalf2x16(const U32& value) { + return Inst(Opcode::UnpackHalf2x16, value); +} + F64 IREmitter::PackDouble2x32(const Value& vector) { return Inst(Opcode::PackDouble2x32, vector); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 74fb3dbcb..27ff5a29d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -115,6 +115,9 @@ public: [[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] Value UnpackFloat2x16(const U32& value); + [[nodiscard]] U32 PackHalf2x16(const Value& vector); + [[nodiscard]] Value UnpackHalf2x16(const U32& value); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); [[nodiscard]] Value UnpackDouble2x32(const F64& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index ad07700ae..308c00153 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -36,7 +36,8 @@ union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; BitField<8, 1, u32> has_bias; - BitField<16, 1, u32> has_lod_clamp; + BitField<9, 1, u32> has_lod_clamp; + BitField<10, 1, u32> relaxed_precision; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 92da5c7e8..9aa7b836c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,10 +553,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TEXS(u64) { - ThrowNotImplemented(Opcode::TEXS); -} - void TranslatorVisitor::TLD(u64) { ThrowNotImplemented(Opcode::TLD); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp similarity index 100% rename from src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp rename to src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..ac1615b00 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -0,0 +1,262 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<53, 4, u64> encoding; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +template +IR::Value Composite(TranslatorVisitor& v, Args... regs) { + return v.ir.CompositeConstruct(v.F(regs)...); +} + +IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { + return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding texs{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::Reg reg_a{texs.src_reg_a}; + const IR::Reg reg_b{texs.src_reg_b}; + IR::TextureInstInfo info{}; + if (texs.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + switch (texs.encoding) { + case 0: // 1D.LZ + info.type.Assign(TextureType::Color1D); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + case 1: // 2D + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); + case 2: // 2D.LZ + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + case 3: // 2D.LL + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, + {}, info); + case 4: // 2D.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + {}, {}, {}, info); + case 5: // 2D.LL.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), + v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + case 6: // 2D.LZ.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + zero, {}, {}, info); + case 7: // ARRAY_2D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleImplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + {}, {}, {}, info); + case 8: // ARRAY_2D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + zero, {}, {}, info); + case 9: // ARRAY_2D.LZ.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ShadowArray2D); + return v.ir.ImageSampleDrefExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + v.F(reg_b + 1), zero, {}, {}, info); + case 10: // 3D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 11: // 3D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, + {}, info); + case 12: // CUBE + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 13: // CUBE.LL + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), + v.F(reg_b + 1), {}, {}, info); + default: + throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); + } +} + +unsigned Swizzle(u64 insn) { + const Encoding texs{insn}; + const size_t encoding{texs.swizzle}; + if (texs.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + const bool is_shadow{sample.Type() == IR::Type::F32}; + if (is_shadow) { + const bool is_alpha{component == 3}; + return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; + } else { + return IR::F32{v.ir.CompositeExtract(sample, component)}; + } +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding texs{insn}; + switch (index) { + case 0: + return texs.dest_reg_a; + case 1: + CheckAlignment(texs.dest_reg_a, 2); + return texs.dest_reg_a + 1; + case 2: + return texs.dest_reg_b; + case 3: + CheckAlignment(texs.dest_reg_b, 2); + return texs.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding texs{insn}; + switch (store_index) { + case 1: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEXS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell From fa2f6e38f4d465ba6e5efe6c6bd23d8ef39b080d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 16 Mar 2021 00:57:07 -0400 Subject: [PATCH 065/770] shader: Implement FSET and FSETP Also fix oversight with adding SignedZeroInfNanPreserve execution mode. --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_spirv.cpp | 6 +- .../maxwell/translate/impl/common_funcs.cpp | 48 +++++++++++++ .../maxwell/translate/impl/common_funcs.h | 6 ++ .../translate/impl/floating_point_compare.cpp | 68 ------------------- .../impl/floating_point_compare_and_set.cpp | 65 ++++++++++++++++++ .../impl/floating_point_set_predicate.cpp | 60 ++++++++++++++++ .../frontend/maxwell/translate/impl/impl.h | 19 ++++++ .../translate/impl/not_implemented.cpp | 24 ------- 9 files changed, 204 insertions(+), 94 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 97e9b4c8e..6d2e804ca 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -66,12 +66,14 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp + frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_min_max.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp + frontend/maxwell/translate/impl/floating_point_set_predicate.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7e7db9161..50c0f7243 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -124,10 +124,12 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddExtension("SPV_KHR_float_controls"); if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); } if (profile.support_fp32_signed_zero_nan_preserve) { - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); } if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 9d4ac2e36..af9a8f82c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -58,4 +58,52 @@ IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp } } +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid FP compare op {}", compare_op); + } +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index c9ae5c500..f8add3c34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -15,4 +15,10 @@ namespace Shader::Maxwell { const IR::U1& predicate_2, BooleanOp bop); [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); + +[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); + +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, + const IR::F32& operand_2, FPCompareOp compare_op, + IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index f254ecb3a..e78e9c4e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -9,74 +9,6 @@ namespace Shader::Maxwell { namespace { -enum class FPCompareOp : u64 { - F, - LT, - EQ, - LE, - GT, - NE, - GE, - NUM, - Nan, - LTU, - EQU, - LEU, - GTU, - NEU, - GEU, - T, -}; - -bool IsCompareOpOrdered(FPCompareOp op) { - switch (op) { - case FPCompareOp::LTU: - case FPCompareOp::EQU: - case FPCompareOp::LEU: - case FPCompareOp::GTU: - case FPCompareOp::NEU: - case FPCompareOp::GEU: - return false; - default: - return true; - } -} - -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, - FPCompareOp compare_op, IR::FpControl control) { - const bool ordered{IsCompareOpOrdered(compare_op)}; - switch (compare_op) { - case FPCompareOp::F: - return ir.Imm1(false); - case FPCompareOp::LT: - case FPCompareOp::LTU: - return ir.FPLessThan(operand_1, operand_2, control, ordered); - case FPCompareOp::EQ: - case FPCompareOp::EQU: - return ir.FPEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::LE: - case FPCompareOp::LEU: - return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GT: - case FPCompareOp::GTU: - return ir.FPGreaterThan(operand_1, operand_2, control, ordered); - case FPCompareOp::NE: - case FPCompareOp::NEU: - return ir.FPNotEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GE: - case FPCompareOp::GEU: - return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::NUM: - return ir.FPOrdered(operand_1, operand_2); - case FPCompareOp::Nan: - return ir.FPUnordered(operand_1, operand_2); - case FPCompareOp::T: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { union { u64 insn; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..c5417775e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -0,0 +1,65 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<55, 1, u64> ftz; + } const fset{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(fset.pred)}; + if (fset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + + v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FSET_reg(u64 insn) { + FSET(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSET_cbuf(u64 insn) { + FSET(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSET_imm(u64 insn) { + FSET(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..8ff9db843 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fsetp{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const BooleanOp bop{fsetp.bop}; + const FPCompareOp compare_op{fsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; + const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(fsetp.dest_pred_a, result_a); + v.ir.SetPred(fsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::FSETP_reg(u64 insn) { + FSETP(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSETP_cbuf(u64 insn) { + FSETP(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSETP_imm(u64 insn) { + FSETP(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 45d6f5e06..761b64666 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -35,6 +35,25 @@ enum class PredicateOp : u64 { NonZero, }; +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9aa7b836c..b31928370 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,30 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSET_reg(u64) { - ThrowNotImplemented(Opcode::FSET_reg); -} - -void TranslatorVisitor::FSET_cbuf(u64) { - ThrowNotImplemented(Opcode::FSET_cbuf); -} - -void TranslatorVisitor::FSET_imm(u64) { - ThrowNotImplemented(Opcode::FSET_imm); -} - -void TranslatorVisitor::FSETP_reg(u64) { - ThrowNotImplemented(Opcode::FSETP_reg); -} - -void TranslatorVisitor::FSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::FSETP_cbuf); -} - -void TranslatorVisitor::FSETP_imm(u64) { - ThrowNotImplemented(Opcode::FSETP_imm); -} - void TranslatorVisitor::FSWZADD(u64) { ThrowNotImplemented(Opcode::FSWZADD); } From 8dd0acfaeba9396fb5c1e142a431a2a29f345855 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Mar 2021 01:30:23 -0300 Subject: [PATCH 066/770] shader: Fix instruction transitions in and out of Phi --- .../frontend/ir/microinstruction.cpp | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 88e186f21..5946105d2 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -182,7 +182,7 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { void Inst::Invalidate() { ClearArgs(); - op = Opcode::Void; + ReplaceOpcode(Opcode::Void); } void Inst::ClearArgs() { @@ -206,20 +206,22 @@ void Inst::ClearArgs() { void Inst::ReplaceUsesWith(Value replacement) { Invalidate(); - - op = Opcode::Identity; - + ReplaceOpcode(Opcode::Identity); if (!replacement.IsImmediate()) { Use(replacement); } - if (op == Opcode::Phi) { - phi_args[0].second = replacement; - } else { - args[0] = replacement; - } + args[0] = replacement; } void Inst::ReplaceOpcode(IR::Opcode opcode) { + if (opcode == IR::Opcode::Phi) { + throw LogicError("Cannot transition into Phi"); + } + if (op == Opcode::Phi) { + // Transition out of phi arguments into non-phi + std::destroy_at(&phi_args); + std::construct_at(&args); + } op = opcode; } From 32b6c63485626f10b3bc8efb0239064cc781115e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Mar 2021 01:33:25 -0300 Subject: [PATCH 067/770] shader: Reorder phi nodes when redefined as undefined opcodes --- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index f89fd51c8..d09bcec36 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -181,8 +181,16 @@ private: } if (same.IsEmpty()) { // The phi is unreachable or in the start block - const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)}; + // First remove the phi node from the block, it will be reinserted + IR::Block::InstructionList& list{block->Instructions()}; + list.erase(IR::Block::InstructionList::s_iterator_to(phi)); + + // Insert an undef instruction after all phi nodes (to keep phi instructions on top) + const auto first_not_phi{std::ranges::find_if_not(list, IsPhi)}; same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; + + // Insert the phi node after the undef opcode, this will be replaced with an identity + list.insert(first_not_phi, phi); } // Reroute all uses of phi to same and remove phi phi.ReplaceUsesWith(same); From 3b7fd3ad0fcb0419c455c16127f43d01b6dc7fc9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 17 Mar 2021 00:53:53 -0400 Subject: [PATCH 068/770] shader: Implement CSET and CSETP --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/frontend/ir/flow_test.h | 5 +- .../frontend/ir/ir_emitter.cpp | 60 +++++++++++++++++-- .../frontend/ir/ir_emitter.h | 1 + .../translate/impl/condition_code_set.cpp | 54 +++++++++++++++++ .../translate/impl/not_implemented.cpp | 8 --- 6 files changed, 114 insertions(+), 15 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6d2e804ca..e4e7749c7 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -63,6 +63,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/common_funcs.cpp frontend/maxwell/translate/impl/common_funcs.h + frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h index ac883da13..09e113773 100644 --- a/src/shader_recompiler/frontend/ir/flow_test.h +++ b/src/shader_recompiler/frontend/ir/flow_test.h @@ -5,12 +5,13 @@ #pragma once #include - #include +#include "common/common_types.h" + namespace Shader::IR { -enum class FlowTest { +enum class FlowTest : u64 { F, LT, EQ, diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d94596ee9..958282160 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -169,16 +169,62 @@ void IREmitter::SetOFlag(const U1& value) { static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { switch (flow_test) { - case FlowTest::T: - return ir.Imm1(true); case FlowTest::F: return ir.Imm1(false); + case FlowTest::LT: + return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())), + ir.GetOFlag()); case FlowTest::EQ: - // TODO: Test this - return ir.GetZFlag(); + return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()); + case FlowTest::LE: + return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); + case FlowTest::GT: + return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()), + ir.LogicalNot(ir.GetZFlag())); case FlowTest::NE: - // TODO: Test this return ir.LogicalNot(ir.GetZFlag()); + case FlowTest::GE: + return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag())); + case FlowTest::NUM: + return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::NaN: + return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); + case FlowTest::LTU: + return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()); + case FlowTest::EQU: + return ir.GetZFlag(); + case FlowTest::LEU: + return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag()); + case FlowTest::GTU: + return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), + ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); + case FlowTest::NEU: + return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::GEU: + return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()), + ir.GetOFlag()); + case FlowTest::T: + return ir.Imm1(true); + case FlowTest::OFF: + return ir.LogicalNot(ir.GetOFlag()); + case FlowTest::LO: + return ir.LogicalNot(ir.GetCFlag()); + case FlowTest::SFF: + return ir.LogicalNot(ir.GetSFlag()); + case FlowTest::LS: + return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag())); + case FlowTest::HI: + return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::SFT: + return ir.GetSFlag(); + case FlowTest::HS: + return ir.GetCFlag(); + case FlowTest::OFT: + return ir.GetOFlag(); + case FlowTest::RLE: + return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); + case FlowTest::RGT: + return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); default: throw NotImplementedException("Flow test {}", flow_test); } @@ -190,6 +236,10 @@ U1 IREmitter::Condition(IR::Condition cond) { return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } +U1 IREmitter::GetFlowTestResult(FlowTest test) { + return GetFlowTest(*this, test); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute) { return Inst(Opcode::GetAttribute, attribute); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 27ff5a29d..05263fe8b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -62,6 +62,7 @@ public: void SetOFlag(const U1& value); [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] U1 GetFlowTestResult(FlowTest test); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..ea0c40a54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::CSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + } const cset{insn}; + + const IR::U32 one_mask{ir.Imm32(-1)}; + const IR::U32 fp_one{ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{ir.Imm32(0)}; + const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; + const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; + const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; + const IR::U32 result{ir.Select(pred_result, pass_result, fail_result)}; + X(cset.dest_reg, result); +} + +void TranslatorVisitor::CSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, BooleanOp> bop; + } const csetp{insn}; + + const BooleanOp bop{csetp.bop}; + const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; + const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; + ir.SetPred(csetp.dest_pred_a, result_a); + ir.SetPred(csetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b31928370..0325f14ea 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -85,14 +85,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::CSET(u64) { - ThrowNotImplemented(Opcode::CSET); -} - -void TranslatorVisitor::CSETP(u64) { - ThrowNotImplemented(Opcode::CSETP); -} - void TranslatorVisitor::DADD_reg(u64) { ThrowNotImplemented(Opcode::DADD_reg); } From 72990df7bad1c81d6ebc51179d34e1bfc71e0caf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 18 Mar 2021 02:53:57 -0400 Subject: [PATCH 069/770] shader: Implement DADD --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/frontend/ir/value.cpp | 8 +++ src/shader_recompiler/frontend/ir/value.h | 1 + .../maxwell/translate/impl/double_add.cpp | 67 +++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 52 +++++++++++++- .../frontend/maxwell/translate/impl/impl.h | 3 + .../translate/impl/not_implemented.cpp | 12 ---- 8 files changed, 132 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index e4e7749c7..b870e9937 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -64,6 +64,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.cpp frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp + frontend/maxwell/translate/impl/double_add.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index f848c6175..204389d74 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -94,6 +94,8 @@ Id EmitContext::Def(const IR::Value& value) { return Constant(U32[1], value.U32()); case IR::Type::F32: return Constant(F32[1], value.F32()); + case IR::Type::F64: + return Constant(F64[1], value.F64()); default: throw NotImplementedException("Immediate type {}", value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 791ba2690..e8e4662e7 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -153,6 +153,14 @@ u64 Value::U64() const { return imm_u64; } +f64 Value::F64() const { + if (IsIdentity()) { + return inst->Arg(0).F64(); + } + ValidateAccess(Type::F64); + return imm_f64; +} + bool Value::operator==(const Value& other) const { if (type != other.type) { return false; diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 3602883d6..b27601e70 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -52,6 +52,7 @@ public: [[nodiscard]] u32 U32() const; [[nodiscard]] f32 F32() const; [[nodiscard]] u64 U64() const; + [[nodiscard]] f64 F64() const; [[nodiscard]] bool operator==(const Value& other) const; [[nodiscard]] bool operator!=(const Value& other) const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..bece191d7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + } const dadd{insn}; + + if (!IR::IsAligned(dadd.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value()); + } + if (!IR::IsAligned(dadd.src_a_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value()); + } + if (dadd.cc != 0) { + throw NotImplementedException("DADD CC"); + } + + const IR::Reg reg_a{dadd.src_a_reg}; + const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))}; + const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; + + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dadd.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)}; + const IR::Value result{v.ir.UnpackDouble2x32(value)}; + + for (int i = 0; i < 2; i++) { + v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::DADD_reg(u64 insn) { + DADD(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DADD_cbuf(u64 insn) { + DADD(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DADD_imm(u64 insn) { + DADD(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7564aeeb2..e444dcd4f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -7,6 +7,15 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { +namespace { +[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, + u32 offset) { + if (unaligned) { + return ir.Imm32(0); + } + return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); +} +} // Anonymous namespace IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); @@ -56,6 +65,18 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } +IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> src; + } const index{insn}; + const IR::Reg reg{index.src}; + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1))); +} + static std::pair CbufAddr(u64 insn) { union { u64 raw; @@ -75,15 +96,31 @@ static std::pair CbufAddr(u64 insn) { } IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { - const auto[binding, byte_offset]{CbufAddr(insn)}; + const auto [binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - const auto[binding, byte_offset]{CbufAddr(insn)}; + const auto [binding, byte_offset]{CbufAddr(insn)}; return ir.GetFloatCbuf(binding, byte_offset); } +IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + const auto [binding, offset_value]{CbufAddr(insn)}; + const bool unaligned{cbuf.unaligned != 0}; + const u32 offset{offset_value.U32()}; + const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + + const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; + const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; + return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -110,6 +147,17 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { return ir.Imm32(Common::BitCast(value | sign_bit)); } +IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; + const u64 value{imm.value << 44}; + return ir.Imm64(Common::BitCast(value | sign_bit)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 761b64666..e3e298c3b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -351,12 +351,15 @@ public: [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); + [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0325f14ea..9675cef54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -85,18 +85,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::DADD_reg(u64) { - ThrowNotImplemented(Opcode::DADD_reg); -} - -void TranslatorVisitor::DADD_cbuf(u64) { - ThrowNotImplemented(Opcode::DADD_cbuf); -} - -void TranslatorVisitor::DADD_imm(u64) { - ThrowNotImplemented(Opcode::DADD_imm); -} - void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: [PATCH 070/770] shader: Add partial rasterizer integration --- src/shader_recompiler/CMakeLists.txt | 4 +- .../backend/spirv/emit_context.cpp | 64 ++- .../backend/spirv/emit_context.h | 18 +- .../backend/spirv/emit_spirv.cpp | 46 +- .../backend/spirv/emit_spirv.h | 18 +- .../spirv/emit_spirv_context_get_set.cpp | 55 ++- .../backend/spirv/emit_spirv_control_flow.cpp | 23 +- src/shader_recompiler/environment.h | 14 + .../frontend/ir/attribute.cpp | 2 +- src/shader_recompiler/frontend/ir/attribute.h | 2 +- .../frontend/ir/ir_emitter.cpp | 14 + .../frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 11 +- src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/ir/reg.h | 4 +- .../frontend/maxwell/control_flow.cpp | 31 +- .../frontend/maxwell/control_flow.h | 3 +- .../frontend/maxwell/program.cpp | 1 + .../maxwell/structured_control_flow.cpp | 18 + .../frontend/maxwell/translate/impl/exit.cpp | 15 - .../maxwell/translate/impl/exit_program.cpp | 43 ++ .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/load_store_attribute.cpp | 86 +++- .../translate/impl/not_implemented.cpp | 16 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 60 ++- .../ir_opt/ssa_rewrite_pass.cpp | 2 +- src/shader_recompiler/program_header.h | 143 ++++++ src/shader_recompiler/recompiler.cpp | 28 -- src/shader_recompiler/recompiler.h | 20 - src/shader_recompiler/shader_info.h | 10 + src/shader_recompiler/stage.h | 19 + src/video_core/CMakeLists.txt | 6 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 + .../renderer_vulkan/fixed_pipeline_state.h | 9 +- .../renderer_vulkan/maxwell_to_vk.cpp | 24 + .../renderer_vulkan/maxwell_to_vk.h | 2 + .../renderer_vulkan/pipeline_helper.h | 162 +++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 209 ++------ .../renderer_vulkan/vk_compute_pipeline.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 445 ++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 66 +++ src/video_core/renderer_vulkan/vk_pipeline.h | 36 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 348 ++++++++++---- .../renderer_vulkan/vk_pipeline_cache.h | 82 +++- .../renderer_vulkan/vk_rasterizer.cpp | 47 +- .../renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_render_pass_cache.cpp | 100 ++++ .../renderer_vulkan/vk_render_pass_cache.h | 53 +++ .../renderer_vulkan/vk_texture_cache.cpp | 68 +-- .../renderer_vulkan/vk_texture_cache.h | 29 +- .../vulkan_common/vulkan_device.cpp | 15 + 54 files changed, 1929 insertions(+), 568 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp create mode 100644 src/shader_recompiler/program_header.h delete mode 100644 src/shader_recompiler/recompiler.cpp delete mode 100644 src/shader_recompiler/recompiler.h create mode 100644 src/shader_recompiler/stage.h create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b870e9937..31c394106 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h + program_header.h profile.h - recompiler.cpp - recompiler.h shader_info.h ) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 204389d74..6c79b611b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineSpecialVariables(program.info); - - u32 binding{}; + DefineInterfaces(program.info, program.stage); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); - DefineLabels(program); } @@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) { return Constant(F32[1], value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); + case IR::Type::Label: + return value.Label()->Definition(); default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + if (info.uses_int8) { AddCapability(spv::Capability::Int8); U8 = Name(TypeInt(8, false), "u8"); @@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() { u32_zero_value = Constant(U32[1], 0U); } -void EmitContext::DefineSpecialVariables(const Info& info) { - const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { - const Id pointer_type{TypePointer(storage_class, type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; - Decorate(id, spv::Decoration::BuiltIn, builtin); - return id; - }}; +void EmitContext::DefineInterfaces(const Info& info, Stage stage) { + const auto define{ + [this](Id type, std::optional builtin, spv::StorageClass storage_class) { + const Id pointer_type{TypePointer(storage_class, type)}; + const Id id{AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + interfaces.push_back(id); + return id; + }}; using namespace std::placeholders; const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; + const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; if (info.uses_workgroup_id) { workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); @@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); } + if (info.loads_position) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = define_input(F32[4], built_in); + } + for (size_t i = 0; i < info.loads_generics.size(); ++i) { + if (info.loads_generics[i]) { + // FIXME: Declare size from input + input_generics[i] = define_input(F32[4], std::nullopt); + Decorate(input_generics[i], spv::Decoration::Location, static_cast(i)); + Name(input_generics[i], fmt::format("in_attr{}", i)); + } + } + if (info.stores_position) { + output_position = define_output(F32[4], spv::BuiltIn::Position); + } + for (size_t i = 0; i < info.stores_generics.size(); ++i) { + if (info.stores_generics[i]) { + output_generics[i] = define_output(F32[4], std::nullopt); + Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); + Name(output_generics[i], fmt::format("out_attr{}", i)); + } + } + if (stage == Stage::Fragment) { + for (size_t i = 0; i < 8; ++i) { + if (!info.stores_frag_color[i]) { + continue; + } + frag_color[i] = define_output(F32[4], std::nullopt); + Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); + Name(frag_color[i], fmt::format("frag_color{}", i)); + } + } } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 35eca258a..2d7961ac3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -46,7 +46,7 @@ struct UniformDefinitions { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -71,6 +71,9 @@ public: UniformDefinitions uniform_types; + Id input_f32{}; + Id output_f32{}; + Id storage_u32{}; std::array cbufs{}; @@ -80,10 +83,21 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id input_position{}; + std::array input_generics{}; + + Id output_position{}; + std::array output_generics{}; + + std::array frag_color{}; + Id frag_depth {}; + + std::vector interfaces; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineSpecialVariables(const Info& info); + void DefineInterfaces(const Info& info, Stage stage); void DefineConstantBuffers(const Info& info, u32& binding); void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50c0f7243..b8978b94a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.U32(); } else if constexpr (std::is_same_v) { return arg.Label(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); } } @@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{profile, program}; +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, + u32& binding) { + EmitContext ctx{profile, program, binding}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; for (IR::Block* const block : program.blocks) { @@ -208,28 +211,41 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program } } ctx.OpFunctionEnd(); - boost::container::small_vector interfaces; - const Info& info{program.info}; - if (info.uses_workgroup_id) { - interfaces.push_back(ctx.workgroup_id); - } - if (info.uses_local_invocation_id) { - interfaces.push_back(ctx.local_invocation_id); - } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); - const std::array workgroup_size{env.WorkgroupSize()}; - ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], - workgroup_size[2]); + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (env.ShaderStage()) { + case Shader::Stage::Compute: { + const std::array workgroup_size{env.WorkgroupSize()}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; + } + case Shader::Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Shader::Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); + break; + default: + throw NotImplementedException("Stage {}", env.ShaderStage()); + } + ctx.AddEntryPoint(execution_model, func, "main", interfaces); SetupDenormControl(profile, program, ctx, func); + const Info& info{program.info}; if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { ctx.AddCapability(spv::Capability::SparseResidency); } + if (info.uses_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89566c83d..ae121f534 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,18 +16,18 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, - IR::Program& program); + IR::Program& program, u32& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, IR::Block* label); -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); @@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx); -void EmitSetAttribute(EmitContext& ctx); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); void EmitGetAttributeIndexed(EmitContext& ctx); void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 125b58cf7..02d115740 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -5,6 +5,43 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} +} // Anonymous namespace void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); @@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); } -void EmitGetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { + return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } -void EmitSetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { + ctx.OpStore(OutputAttrPointer(ctx, attr), value); } void EmitGetAttributeIndexed(EmitContext&) { @@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + const Id component_id{ctx.Constant(ctx.U32[1], component)}; + const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); +} + +void EmitSetFragDepth(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.frag_depth, value); +} + void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 48755b827..6b81f0169 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,26 +6,29 @@ namespace Shader::Backend::SPIRV { -void EmitBranch(EmitContext& ctx, IR::Block* label) { - ctx.OpBranch(label->Definition()); +void EmitBranch(EmitContext& ctx, Id label) { + ctx.OpBranch(label); } -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { - ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { + ctx.OpBranchConditional(condition, true_label, false_label); } -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { - ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), - spv::LoopControlMask::MaskNone); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { + ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); } -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { - ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); } void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { + ctx.OpDemoteToHelperInvocationEXT(); + ctx.OpBranch(continue_label); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0fcb68050..1fcaa56dd 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,6 +3,8 @@ #include #include "common/common_types.h" +#include "shader_recompiler/stage.h" +#include "shader_recompiler/program_header.h" namespace Shader { @@ -15,6 +17,18 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; [[nodiscard]] virtual std::array WorkgroupSize() = 0; + + [[nodiscard]] const ProgramHeader& SPH() const noexcept { + return sph; + } + + [[nodiscard]] Stage ShaderStage() const noexcept { + return stage; + } + +protected: + ProgramHeader sph{}; + Stage stage{}; }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 2fb7d576f..4811242ea 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept { return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; } -int GenericAttributeIndex(Attribute attribute) { +u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index bb2cad6af..34ec7e0cd 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -224,7 +224,7 @@ enum class Attribute : u64 { [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; -[[nodiscard]] int GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 958282160..672836c0b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,12 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::DemoteToHelperInvocation(Block* continue_label) { + block->SetBranch(continue_label); + continue_label->AddImmediatePredecessor(block); + Inst(Opcode::DemoteToHelperInvocation, continue_label); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } @@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { + Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); +} + +void IREmitter::SetFragDepth(const F32& value) { + Inst(Opcode::SetFragDepth, value); +} + U32 IREmitter::WorkgroupIdX() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 05263fe8b..72af5db37 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -36,6 +36,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void DemoteToHelperInvocation(Block* continue_label); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -67,6 +68,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); + void SetFragDepth(const F32& value); + [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5946105d2..21b7d8a9f 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::DemoteToHelperInvocation: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetFragColor: + case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: case Opcode::WriteGlobalU16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9052a4903..593faca52 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(DemoteToHelperInvocation, Void, Label, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) @@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU64, U64, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetAttribute, F32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, F32, ) +OPCODE(GetAttributeIndexed, F32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index bce8b19b3..733513c8b 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" namespace Shader::IR { @@ -17,6 +18,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; Info info; + Stage stage{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 8fea05f7b..3845ec5fb 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } -[[nodiscard]] constexpr Reg operator++(Reg& reg) { +constexpr Reg operator++(Reg& reg) { reg = reg + 1; return reg; } -[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { +constexpr Reg operator++(Reg& reg, int) { const Reg copy{reg}; reg = reg + 1; return copy; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d..4f6707fae 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { case Opcode::EXIT: case Opcode::JMP: case Opcode::JMX: + case Opcode::KIL: case Opcode::BRK: case Opcode::CONT: case Opcode::LONGJMP: @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati block->end = pc; return AnalysisState::Branch; } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } case Opcode::PBK: case Opcode::PCNT: case Opcode::PEXIT: @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond, - bool visit_conditional_inst) { + EndClass insn_end_class, IR::Condition cond) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block block->branch_false = endif_block; - // And branch to it from the conditional instruction if it is a branch - if (insn_end_class == EndClass::Branch) { + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { conditional_block->cond = IR::Condition{true}; conditional_block->branch_true = endif_block; conditional_block->branch_false = nullptr; @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } - block->end = pc; + block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; } @@ -505,6 +514,12 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; } } if (function.entrypoint == 8) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210f..22f134194 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -29,6 +29,7 @@ enum class EndClass { Call, Exit, Return, + Kill, }; enum class Token { @@ -130,7 +131,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond, bool visit_conditional_inst); + IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa64326..0074eb89b 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolid, DumpExpr(stmt->op)); break; @@ -424,6 +430,9 @@ private: gotos.push_back(root.insert(ip, *goto_stmt)); break; } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{})); + break; } } } @@ -729,6 +738,15 @@ private: current_block = nullptr; break; } + case StatementType::Kill: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d1..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { - -void TranslatorVisitor::EXIT(u64) { - ir.Exit(); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..ea9b33da9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + throw NotImplementedException("Sample mask"); + } + if (sph.ps.omap.depth != 0) { + throw NotImplementedException("Fragment depth"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b..ed81d9c36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -108,7 +108,7 @@ public: void DSETP_reg(u64 insn); void DSETP_cbuf(u64 insn); void DSETP_imm(u64 insn); - void EXIT(u64 insn); + void EXIT(); void F2F_reg(u64 insn); void F2F_cbuf(u64 insn); void F2F_imm(u64 insn); @@ -220,7 +220,7 @@ public: void JCAL(u64 insn); void JMP(u64 insn); void JMX(u64 insn); - void KIL(u64 insn); + void KIL(); void LD(u64 insn); void LDC(u64 insn); void LDG(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4..2922145ee 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -11,6 +11,13 @@ namespace Shader::Maxwell { namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + enum class InterpolationMode : u64 { Pass, Multiply, @@ -23,8 +30,85 @@ enum class SampleMode : u64 { Centroid, Offset, }; + +int NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} } // Anonymous namespace +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> stream_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + if (ald.o != 0) { + throw NotImplementedException("O"); + } + if (ald.patch != 0) { + throw NotImplementedException("P"); + } + if (ald.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed"); + } + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ald.size)}; + for (int element = 0; element < num_elements; ++element) { + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + } +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> stream_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.patch != 0) { + throw NotImplementedException("P"); + } + if (ast.stream_reg != IR::Reg::RZ) { + throw NotImplementedException("Stream store"); + } + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ast.size)}; + for (int element = 0; element < num_elements; ++element) { + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + } +} + void TranslatorVisitor::IPA(u64 insn) { // IPA is the instruction used to read varyings from a fragment shader. // gl_FragCoord is mapped to the gl_Position attribute. @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { // } const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; if (is_indexed) { - throw NotImplementedException("IPA.IDX"); + throw NotImplementedException("IDX"); } const IR::Attribute attribute{ipa.attribute}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef54..59252bcc5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { ThrowNotImplemented(Opcode::AL2P); } -void TranslatorVisitor::ALD(u64) { - ThrowNotImplemented(Opcode::ALD); -} - -void TranslatorVisitor::AST(u64) { - ThrowNotImplemented(Opcode::AST); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::EXIT(u64) { - throw LogicError("Visting EXIT instruction"); -} - void TranslatorVisitor::F2F_reg(u64) { ThrowNotImplemented(Opcode::F2F_reg); } @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { ThrowNotImplemented(Opcode::JMX); } -void TranslatorVisitor::KIL(u64) { - ThrowNotImplemented(Opcode::KIL); +void TranslatorVisitor::KIL() { + // KIL is a no-op } void TranslatorVisitor::LD(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c64..0fbb87ec4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const tex{insn}; - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset * 4)); } void TranslatorVisitor::TEX_b(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b00..54f0df754 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::Value Sample(TranslatorVisitor& v, u64 insn) { const Encoding texs{insn}; - const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset * 4))}; const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::Reg reg_a{texs.src_reg_a}; const IR::Reg reg_b{texs.src_reg_b}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 708b6b267..fbbe28632 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { return; } info.constant_buffer_mask |= 1U << index; - info.constant_buffer_descriptors.push_back({ - .index{index}, - .count{1}, - }); + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), + ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void GetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.loads_position = true; + break; + default: + throw NotImplementedException("Get attribute {}", attribute); + } +} + +void SetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.stores_position = true; + break; + default: + throw NotImplementedException("Set attribute {}", attribute); + } } void VisitUsages(Info& info, IR::Inst& inst) { @@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { + case IR::Opcode::DemoteToHelperInvocation: + info.uses_demote_to_helper_invocation = true; + break; + case IR::Opcode::GetAttribute: + GetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetAttribute: + SetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetFragColor: + info.stores_frag_color[inst.Arg(0).U32()] = true; + break; + case IR::Opcode::SetFragDepth: + info.stores_frag_depth = true; + break; case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; break; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index d09bcec36..bab7ca186 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -169,7 +169,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op == same || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { // Unique value or self-reference continue; } diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..1544bfa42 --- /dev/null +++ b/src/shader_recompiler/program_header.h @@ -0,0 +1,143 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +enum class PixelImap : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html +struct ProgramHeader { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 5, u32> reserved; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + }; + INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved + INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // OmapColor + INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + } vtg; + + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; + u8 raw; + } imap_generic_vector[32]; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved + + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; + } omap; + + [[nodiscard]] std::array EnabledOutputComponents(u32 rt) const noexcept { + const u32 bits{omap.target >> (rt * 4)}; + return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; + } + + [[nodiscard]] std::array GenericInputMap(u32 attribute) const { + const auto& vector{imap_generic_vector[attribute]}; + return {vector.x, vector.y, vector.z, vector.w}; + } + } ps; + + std::array raw; + }; + + [[nodiscard]] u64 LocalMemorySize() const noexcept { + return (common1.shader_local_memory_low_size | + (common2.shader_local_memory_high_size << 24)); + } +}; +static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp deleted file mode 100644 index 527e19c27..000000000 --- a/src/shader_recompiler/recompiler.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/object_pool.h" -#include "shader_recompiler/recompiler.h" - -namespace Shader { - -std::pair> RecompileSPIRV(const Profile& profile, Environment& env, - u32 start_address) { - ObjectPool flow_block_pool; - ObjectPool inst_pool; - ObjectPool block_pool; - - Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; - IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; -} - -} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h deleted file mode 100644 index 2529463ae..000000000 --- a/src/shader_recompiler/recompiler.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/shader_info.h" - -namespace Shader { - -[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, - Environment& env, u32 start_address); - -} // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index adc1d9a64..6eff762e2 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -56,6 +56,15 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + + std::array loads_generics{}; + bool loads_position{}; + + std::array stores_frag_color{}; + bool stores_frag_depth{}; + std::array stores_generics{}; + bool stores_position{}; + bool uses_fp16{}; bool uses_fp64{}; bool uses_fp16_denorms_flush{}; @@ -68,6 +77,7 @@ struct Info { bool uses_image_1d{}; bool uses_sampled_1d{}; bool uses_sparse_residency{}; + bool uses_demote_to_helper_invocation{}; IR::Type used_constant_buffer_types{}; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..fc6ce6043 --- /dev/null +++ b/src/shader_recompiler/stage.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +enum class Stage { + Compute, + VertexA, + VertexB, + TessellationControl, + TessellationEval, + Geometry, + Fragment, +}; + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3323e6916..71b07c194 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,7 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/pipeline_helper.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -116,15 +117,18 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_render_pass_cache.cpp + renderer_vulkan/vk_render_pass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d8f683907 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast(regs.zeta.format)); + std::ranges::transform(regs.rt, color_formats.begin(), + [](const auto& rt) { return static_cast(rt.format); }); alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..348f1d6ce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,7 +60,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array Mask() const noexcept { + std::array Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +97,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast(size.Value()); } }; @@ -187,7 +187,10 @@ struct FixedPipelineState { u32 raw2; BitField<0, 3, u32> alpha_test_func; BitField<3, 1, u32> early_z; + BitField<4, 1, u32> depth_enabled; + BitField<5, 5, u32> depth_format; }; + std::array color_formats; u32 alpha_test_ref; u32 point_size; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..dc4ff0da2 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..9f78e15b6 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..0a59aa659 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,162 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + [[likely]] if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +struct DescriptorLayoutTuple { + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; +}; + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutTuple Create(const vk::Device& device) { + DescriptorLayoutTuple result; + if (!bindings.empty()) { + result.descriptor_set_layout = device.CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + result.pipeline_layout = device.CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + if (!entries.empty()) { + result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *result.descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *result.pipeline_layout, + .set = 0, + }); + } + return result; + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); + } + } + +private: + void Add(VkDescriptorType type, VkShaderStageFlags stage) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + + boost::container::small_vector bindings; + boost::container::small_vector entries; + u32 binding{}; + size_t offset{}; +}; + +inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); + return {}; +} + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, + const ImageId* image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { + for (const auto& desc : info.texture_descriptors) { + const VkSampler sampler{samplers[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + ++index; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ff..6684d37a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,6 +6,7 @@ #include +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -17,140 +18,10 @@ namespace Vulkan { namespace { -vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { - boost::container::small_vector bindings; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for (const auto& desc : info.texture_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) { - boost::container::small_vector entries; - size_t offset{}; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for (const auto& desc : info.texture_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, - .pipelineLayout = pipeline_layout, - .set = 0, - }); -} - -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - } - UNREACHABLE_MSG("Invalid texture type {}", type); +DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { + DescriptorLayoutBuilder builder; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + return builder.Create(device.GetLogical()); } } // Anonymous namespace @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip VKUpdateDescriptorQueue& update_descriptor_queue_, const Shader::Info& info_, vk::ShaderModule spv_module_) : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, - spv_module(std::move(spv_module_)), - descriptor_set_layout(CreateDescriptorSetLayout(device, info)), - descriptor_allocator(descriptor_pool, *descriptor_set_layout), - pipeline_layout{device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - })}, - descriptor_update_template{ - CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, - pipeline{device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - })} {} + spv_module(std::move(spv_module_)) { + DescriptorLayoutTuple tuple{CreateLayout(device, info)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); +} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple static constexpr size_t max_elements = 64; std::array image_view_ids; boost::container::static_vector image_view_indices; - boost::container::static_vector sampler_handles; + boost::container::static_vector samplers; const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); + samplers.push_back(sampler->Handle()); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); size_t index{}; - for (const auto& desc : info.texture_descriptors) { - const VkSampler vk_sampler{sampler_handles[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; - update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); - ++index; - } + PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, + *update_descriptor_queue, index); } VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4..e82e5816b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -9,7 +9,6 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_pipeline.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,7 +17,7 @@ namespace Vulkan { class Device; -class ComputePipeline : public Pipeline { +class ComputePipeline { public: explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..a2ec418b1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,445 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include +#include + +#include "common/bit_field.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder.Create(device.GetLogical()); +} + +template +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; +} + +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { + static constexpr std::array unsupported_topologies{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { + union Swizzle { + u32 raw; + BitField<0, 3, Maxwell::ViewportSwizzle> x; + BitField<4, 3, Maxwell::ViewportSwizzle> y; + BitField<8, 3, Maxwell::ViewportSwizzle> z; + BitField<12, 3, Maxwell::ViewportSwizzle> w; + }; + const Swizzle unpacked{swizzle}; + return VkViewportSwizzleNV{ + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; +} + +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; + } + return PixelFormatFromRenderTargetFormat(format); +} + +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache, + const FixedPipelineState& state, + std::array stages, + const std::array& infos) + : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, + buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, + update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, state, render_pass); +} + +void GraphicsPipeline::Configure(bool is_indexed) { + static constexpr size_t max_images_elements = 64; + std::array image_view_ids; + static_vector image_view_indices; + static_vector samplers; + + texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache->UpdateRenderTargets(false); + + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache->UnbindGraphicsStorageBuffers(stage); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); + ++index; + } + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(cbufs[cbuf_index].enabled); + const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; + const u32 raw_handle{gpu_memory->Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache->UpdateGraphicsBuffers(is_indexed); + texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + + buffer_cache->BindHostGeometryBuffers(is_indexed); + + size_t index{}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + buffer_cache->BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), + *texture_cache, *update_descriptor_queue, index); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + + scheduler->BindGraphicsPipeline(*pipeline); + scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + }); +} + +void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!device.IsExtExtendedDynamicStateSupported()) { + dynamic = state.dynamic_state; + } + static_vector vertex_bindings; + static_vector vertex_binding_divisors; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = state.binding_divisors[index], + }); + } + } + static_vector vertex_attributes; + const auto& input_attributes = stage_infos[0].loads_generics; + for (size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; + if (!attribute.enabled || !input_attributes[index]) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &input_divisor_ci; + } + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; + std::array swizzles; + std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; + if (device.IsNvViewportSwizzleSupported()) { + viewport_ci.pNext = &swizzle_ci; + } + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = static_cast( + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + static_vector cb_attachments; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + static constexpr std::array mask_table{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; + const auto format{static_cast(state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto& blend{state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; + } + cb_attachments.push_back({ + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = write_mask, + }); + } + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast(cb_attachments.size()), + .pAttachments = cb_attachments.data(), + .blendConstants = {}, + }; + static_vector dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended{ + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + static_vector shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { + continue; + } + [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + */ + } + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..ba1d34a83 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + +public: + explicit GraphicsPipeline() = default; + explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, + TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, const FixedPipelineState& state, + std::array stages, + const std::array& infos); + + void Configure(bool is_indexed); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + +private: + void MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass); + + Tegra::Engines::Maxwell3D* maxwell3d{}; + Tegra::MemoryManager* gpu_memory{}; + TextureCache* texture_cache{}; + BufferCache* buffer_cache{}; + VKScheduler* scheduler{}; + VKUpdateDescriptorQueue* update_descriptor_queue{}; + + std::array spv_modules; + std::array stage_infos; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b06288403..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Pipeline { -public: - /// Add a reference count to the pipeline - void AddRef() noexcept { - ++ref_count; - } - - [[nodiscard]] bool RemoveRef() noexcept { - --ref_count; - return ref_count == 0; - } - - [[nodiscard]] u64 UsageTick() const noexcept { - return usage_tick; - } - -protected: - u64 usage_tick{}; - -private: - size_t ref_count{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903..c9da2080d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,8 +12,11 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" -#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/program_header.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -34,18 +37,18 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; - namespace { -class Environment final : public Shader::Environment { +using Shader::Backend::SPIRV::EmitSPIRV; + +class GenericEnvironment : public Shader::Environment { public: - explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} {} - ~Environment() override = default; + ~GenericEnvironment() override = default; - [[nodiscard]] std::optional Analyze(u32 start_address) { + std::optional Analyze(u32 start_address) { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -55,52 +58,47 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } - [[nodiscard]] size_t ShaderSize() const noexcept { + [[nodiscard]] size_t CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; + } + + [[nodiscard]] size_t ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } - [[nodiscard]] u128 ComputeHash() const { - const size_t size{ShaderSize()}; + [[nodiscard]] u128 CalculateHash() const { + const size_t size{ReadSize()}; auto data = std::make_unique(size); - gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash128(reinterpret_cast(data.get()), size); } - u64 ReadInstruction(u32 address) override { + u64 ReadInstruction(u32 address) final { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } - return gpu_memory.Read(program_base + address); + return gpu_memory->Read(program_base + address); } - u32 TextureBoundBuffer() override { - return kepler_compute.regs.tex_cb_index; - } - - std::array WorkgroupSize() override { - const auto& qmd{kepler_compute.launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - -private: +protected: static constexpr size_t INST_SIZE = sizeof(u64); - static constexpr size_t BLOCK_SIZE = 0x1000; - static constexpr size_t MAXIMUM_SIZE = 0x100000; - static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + std::optional TryFindSize(GPUVAddr guest_addr) { + constexpr size_t BLOCK_SIZE = 0x1000; + constexpr size_t MAXIMUM_SIZE = 0x100000; + + constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - std::optional TryFindSize(u32 start_address) { - GPUVAddr guest_addr = program_base + start_address; size_t offset = 0; size_t size = BLOCK_SIZE; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; - gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { const u64 inst = data[i / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { @@ -114,17 +112,87 @@ private: return std::nullopt; } - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - GPUVAddr program_base; - - u32 read_lowest = 0; - u32 read_highest = 0; + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; std::vector code; + + u32 read_lowest = std::numeric_limits::max(); + u32 read_highest = 0; + u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; }; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, + GPUVAddr program_base_, u32 start_offset) + : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + } + } + + ~GraphicsEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return maxwell3d->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + throw Shader::LogicError("Requesting workgroup size in a graphics stage"); + } + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + stage = Shader::Stage::Compute; + } + + ~ComputeEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return kepler_compute->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute->launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + const auto& float_control{device.FloatControlProperties()}; + profile = Shader::Profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel + }; +} PipelineCache::~PipelineCache() = default; +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (!RefreshStages()) { + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateGraphicsPipeline(); + return &pipeline; +} + ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return &pipeline; } pipeline = CreateComputePipeline(shader); - shader->compute_users.push_back(key); return &pipeline; } +bool PipelineCache::RefreshStages() { + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + graphics_key.unique_hashes[index] = u128{}; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 offset{shader_config.offset}; + shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + } + graphics_key.unique_hashes[index] = shader_info->unique_hash; + } + return true; +} + +const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr) { + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze(start_address)}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + flow_block_pool.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + std::array envs; + std::array programs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + + const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); + Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); + programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + } + std::array infos{}; + std::array modules; + + u32 binding{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + GraphicsEnvironment& env{envs[index]}; + Shader::IR::Program& program{programs[index]}; + + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + std::vector code{EmitSPIRV(profile, env, program, binding)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, + graphics_key.state, std::move(modules), infos); +} + ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - Environment env{kepler_compute, gpu_memory, program_base}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto& float_control{device.FloatControlProperties()}; - const Shader::Profile profile{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel - }; - const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; + Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + u32 binding{0}; + std::vector code{EmitSPIRV(profile, env, program, binding)}; /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); */ - shader_info->unique_hash = env.ComputeHash(); - shader_info->size_bytes = env.ShaderSize(); - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + shader_info->unique_hash = env.CalculateHash(); + shader_info->size_bytes = env.ReadSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ ShaderInfo shader; ComputePipeline pipeline{CreateComputePipeline(&shader)}; const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - shader.compute_users.push_back(key); - pipeline.AddRef(); - const size_t size_bytes{shader.size_bytes}; Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); return &compute_cache.emplace(key, std::move(pipeline)).first->second; @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) }; } -void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { - for (const ComputePipelineCacheKey& key : shader->compute_users) { - const auto it = compute_cache.find(key); - ASSERT(it != compute_cache.end()); - - Pipeline& pipeline = it->second; - if (pipeline.RemoveRef()) { - // Wait for the pipeline to be free of GPU usage before destroying it - scheduler.Wait(pipeline.UsageTick()); - compute_cache.erase(it); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27..60fb976df 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -12,11 +12,18 @@ #include #include -#include - #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,13 +33,6 @@ class System; namespace Vulkan { -class Device; -class RasterizerVulkan; -class ComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + } // namespace Vulkan namespace std { @@ -63,14 +83,28 @@ struct hash { } }; +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + } // namespace std namespace Vulkan { +class ComputePipeline; +class Device; +class RasterizerVulkan; +class RenderPassCache; +class VKDescriptorPool; +class VKScheduler; +class VKUpdateDescriptorQueue; + struct ShaderInfo { u128 unique_hash{}; size_t size_bytes{}; - std::vector compute_users; }; class PipelineCache final : public VideoCommon::ShaderCache { @@ -80,15 +114,23 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache); ~PipelineCache() override; + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); -protected: - void OnShaderRemoval(ShaderInfo* shader) override; - private: + bool RefreshStages(); + + const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr); + + GraphicsPipeline CreateGraphicsPipeline(); + ComputePipeline CreateComputePipeline(ShaderInfo* shader); ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); @@ -104,8 +146,20 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + + GraphicsPipelineCacheKey graphics_key{}; std::unordered_map compute_cache; + std::unordered_map graphics_cache; + + Shader::ObjectPool inst_pool; + Shader::ObjectPool block_pool; + Shader::ObjectPool flow_block_pool; + + Shader::Profile profile; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29..036b531b9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } + update_descriptor_queue.Acquire(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); + + BeginTransformFeedback(); + + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + UpdateDynamicStates(); + + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b915..88dbd753b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -148,6 +149,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..7e5ae43ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector descriptions; + u32 num_images{0}; + + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + if (format == PixelFormat::Invalid) { + continue; + } + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + ++num_images; + } + const size_t num_colors{descriptions.size()}; + const VkAttachmentReference* depth_attachment{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..db8e83f1a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + + class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..1bbc542a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (color == std::array{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector descriptions; std::vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..189ee5a68 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -26,35 +26,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast(key.depth_format) << 48; - value ^= static_cast(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,7 +37,7 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4887d6fd9..f0e5b098c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + .pNext = nullptr, + .shaderDemoteToHelperInvocation = true, + }; + SetNext(next, demote); + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8 = { @@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; + demote.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + demote.pNext = nullptr; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = &demote; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), From eeb1efa2d2947faed55340e8aec3a0187a3729a1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 20:28:26 -0300 Subject: [PATCH 071/770] shader: Implement LOP32I --- .../translate/impl/logic_operation.cpp | 59 ++++++++++++++----- .../translate/impl/not_implemented.cpp | 4 -- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp index e786a388e..89e5cd6de 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -32,34 +32,51 @@ enum class LogicalOp : u64 { } } -void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, + LogicalOp bit_op, std::optional pred_op = std::nullopt, + IR::Pred dest_pred = IR::Pred::PT) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; - BitField<39, 1, u64> neg_a; - BitField<40, 1, u64> neg_b; - BitField<41, 2, LogicalOp> bit_op; - BitField<43, 1, u64> x; - BitField<44, 2, PredicateOp> pred_op; - BitField<48, 3, IR::Pred> pred; } const lop{insn}; - if (lop.x != 0) { - throw NotImplementedException("LOP X"); + if (x) { + throw NotImplementedException("X"); + } + if (cc) { + throw NotImplementedException("CC"); } IR::U32 op_a{v.X(lop.src_reg)}; - if (lop.neg_a != 0) { + if (inv_a != 0) { op_a = v.ir.BitwiseNot(op_a); } - if (lop.neg_b != 0) { + if (inv_b != 0) { op_b = v.ir.BitwiseNot(op_b); } - const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; - const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; + if (pred_op) { + const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; + v.ir.SetPred(dest_pred, pred_result); + } v.X(lop.dest_reg, result); - v.ir.SetPred(lop.pred, pred_result); +} + +void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<39, 1, u64> inv_a; + BitField<40, 1, u64> inv_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> dest_pred; + } const lop{insn}; + + LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, + lop.pred_op, lop.dest_pred); } } // Anonymous namespace @@ -74,4 +91,18 @@ void TranslatorVisitor::LOP_cbuf(u64 insn) { void TranslatorVisitor::LOP_imm(u64 insn) { LOP(*this, insn, GetImm20(insn)); } + +void TranslatorVisitor::LOP32I(u64 insn) { + union { + u64 raw; + BitField<53, 2, LogicalOp> bit_op; + BitField<57, 1, u64> x; + BitField<52, 1, u64> cc; + BitField<55, 1, u64> inv_a; + BitField<56, 1, u64> inv_b; + } const lop32i{insn}; + + LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, + lop32i.inv_b != 0, lop32i.bit_op); +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 59252bcc5..a4367fc5a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -357,10 +357,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::LOP32I(u64) { - ThrowNotImplemented(Opcode::LOP32I); -} - void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } From c97d03efb9e02f89cca6dfea4c8d5c37fc4a2adc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 21:14:58 -0300 Subject: [PATCH 072/770] shader: Implement ISCADD (imm) --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index f06046d4d..5469e445a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -57,8 +57,8 @@ void TranslatorVisitor::ISCADD_cbuf(u64) { throw NotImplementedException("ISCADD (cbuf)"); } -void TranslatorVisitor::ISCADD_imm(u64) { - throw NotImplementedException("ISCADD (imm)"); +void TranslatorVisitor::ISCADD_imm(u64 insn) { + ISCADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::ISCADD32I(u64) { From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: [PATCH 073/770] shader: Implement I2F --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 2 + .../backend/spirv/emit_spirv.h | 13 ++ .../backend/spirv/emit_spirv_convert.cpp | 48 +++++ .../backend/spirv/emit_spirv_integer.cpp | 4 + .../frontend/ir/ir_emitter.cpp | 145 ++++++++++----- .../frontend/ir/ir_emitter.h | 14 +- src/shader_recompiler/frontend/ir/opcodes.inc | 13 ++ .../frontend/maxwell/translate/impl/impl.cpp | 21 +++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../integer_floating_point_conversion.cpp | 173 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 -- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 28 +++ .../ir_opt/lower_fp16_to_fp32.cpp | 16 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- 17 files changed, 429 insertions(+), 70 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 31c394106..d0f0ec775 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -84,6 +84,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_add_three_input.cpp frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_compare_and_set.cpp + frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp frontend/maxwell/translate/impl/integer_funnel_shift.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 6c79b611b..6c8f16562 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -89,6 +89,8 @@ Id EmitContext::Def(const IR::Value& value) { return value.U1() ? true_value : false_value; case IR::Type::U32: return Constant(U32[1], value.U32()); + case IR::Type::U64: + return Constant(U64, value.U64()); case IR::Type::F32: return Constant(F32[1], value.F32()); case IR::Type::F64: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index ae121f534..1fe65f8a9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -243,6 +243,7 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); @@ -302,16 +303,28 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); Id EmitConvertF16S32(EmitContext& ctx, Id value); Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); Id EmitConvertF16U32(EmitContext& ctx, Id value); Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); Id EmitConvertF32S32(EmitContext& ctx, Id value); Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); Id EmitConvertF32U32(EmitContext& ctx, Id value); Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); Id EmitConvertF64S32(EmitContext& ctx, Id value); Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitBindlessImageSampleImplicitLod(EmitContext&); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 2aff673aa..757165626 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -102,6 +102,14 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { return ctx.OpFConvert(ctx.F64[1], value); } +Id EmitConvertF16S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + Id EmitConvertF16S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F16[1], value); } @@ -110,6 +118,14 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F16[1], value); } +Id EmitConvertF16U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + Id EmitConvertF16U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F16[1], value); } @@ -118,6 +134,14 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F16[1], value); } +Id EmitConvertF32S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF32S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF32S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F32[1], value); } @@ -126,6 +150,14 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F32[1], value); } +Id EmitConvertF32U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF32U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF32U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F32[1], value); } @@ -134,6 +166,14 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F32[1], value); } +Id EmitConvertF64S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF64S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF64S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F64[1], value); } @@ -142,6 +182,14 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F64[1], value); } +Id EmitConvertF64U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF64U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF64U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F64[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index c9de204b0..a9c5e9cca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -70,6 +70,10 @@ Id EmitIAbs32(EmitContext& ctx, Id value) { return ctx.OpSAbs(ctx.U32[1], value); } +Id EmitIAbs64(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U64, value); +} + Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 672836c0b..652f6949e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -53,6 +53,10 @@ U64 IREmitter::Imm64(u64 value) const { return U64{Value{value}}; } +U64 IREmitter::Imm64(s64 value) const { + return U64{Value{static_cast(value)}}; +} + F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } @@ -363,7 +367,7 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { } F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { - if (a.Type() != a.Type()) { + if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { @@ -974,8 +978,15 @@ U32U64 IREmitter::INeg(const U32U64& value) { } } -U32 IREmitter::IAbs(const U32& value) { - return Inst(Opcode::IAbs32, value); +U32U64 IREmitter::IAbs(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::IAbs32, value); + case Type::U64: + return Inst(Opcode::IAbs64, value); + default: + ThrowInvalidType(value.Type()); + } } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { @@ -1074,8 +1085,25 @@ U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } -U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { - return Inst(Opcode::IEqual, lhs, rhs); +U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(Opcode::IEqual, lhs, rhs); + case Type::U64: { + // Manually compare the unpacked values + const Value lhs_vector{UnpackUint2x32(lhs)}; + const Value rhs_vector{UnpackUint2x32(rhs)}; + return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, + IR::U32{CompositeExtract(rhs_vector, 0)}), + IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, + IR::U32{CompositeExtract(rhs_vector, 1)})); + } + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { @@ -1198,79 +1226,96 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16S8, value); + case 16: + return Inst(Opcode::ConvertF16S16, value); + case 32: return Inst(Opcode::ConvertF16S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32S8, value); + case 16: + return Inst(Opcode::ConvertF32S16, value); + case 32: return Inst(Opcode::ConvertF32S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64S32, value); - case Type::U64: - return Inst(Opcode::ConvertF64S64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64S8, value); + case 16: + return Inst(Opcode::ConvertF64S16, value); + case 32: + return Inst(Opcode::ConvertF64S32, value); + case 64: + return Inst(Opcode::ConvertF64S64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16U8, value); + case 16: + return Inst(Opcode::ConvertF16U16, value); + case 32: return Inst(Opcode::ConvertF16U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32U8, value); + case 16: + return Inst(Opcode::ConvertF32U16, value); + case 32: return Inst(Opcode::ConvertF32U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64U32, value); - case Type::U64: - return Inst(Opcode::ConvertF64U64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64U8, value); + case 16: + return Inst(Opcode::ConvertF64U16, value); + case 32: + return Inst(Opcode::ConvertF64U32, value); + case 64: + return Inst(Opcode::ConvertF64U64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { +F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value) { if (is_signed) { - return ConvertSToF(bitsize, value); + return ConvertSToF(dest_bitsize, src_bitsize, value); } else { - return ConvertUToF(bitsize, value); + return ConvertUToF(dest_bitsize, src_bitsize, value); } } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 72af5db37..8edb11154 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -29,6 +29,7 @@ public: [[nodiscard]] U32 Imm32(s32 value) const; [[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; void Branch(Block* label); @@ -170,7 +171,7 @@ public: [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); - [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32U64 IAbs(const U32U64& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); @@ -193,7 +194,7 @@ public: [[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); - [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); @@ -207,9 +208,12 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 593faca52..8471db7b9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -247,6 +247,7 @@ OPCODE(IMul32, U32, U32, OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) +OPCODE(IAbs64, U64, U64, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) @@ -311,16 +312,28 @@ OPCODE(ConvertF16F32, F16, F32, OPCODE(ConvertF32F16, F32, F16, ) OPCODE(ConvertF32F64, F32, F64, ) OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S8, F16, U32, ) +OPCODE(ConvertF16S16, F16, U32, ) OPCODE(ConvertF16S32, F16, U32, ) OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U8, F16, U32, ) +OPCODE(ConvertF16U16, F16, U32, ) OPCODE(ConvertF16U32, F16, U32, ) OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S8, F32, U32, ) +OPCODE(ConvertF32S16, F32, U32, ) OPCODE(ConvertF32S32, F32, U32, ) OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U8, F32, U32, ) +OPCODE(ConvertF32U16, F32, U32, ) OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S8, F64, U32, ) +OPCODE(ConvertF64S16, F64, U32, ) OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U8, F64, U32, ) +OPCODE(ConvertF64U16, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF64U64, F64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index e444dcd4f..c9af83010 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); } +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { return ir.Imm64(Common::BitCast(value | sign_bit)); } +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast(static_cast(value) << 32)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ed81d9c36..cb66cca25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -356,10 +356,12 @@ public: [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..e8b5ae1d2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,173 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4367fc5a..4078feafa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) { ThrowNotImplemented(Opcode::HSETP2_imm); } -void TranslatorVisitor::I2F_reg(u64) { - ThrowNotImplemented(Opcode::I2F_reg); -} - -void TranslatorVisitor::I2F_cbuf(u64) { - ThrowNotImplemented(Opcode::I2F_cbuf); -} - -void TranslatorVisitor::I2F_imm(u64) { - ThrowNotImplemented(Opcode::I2F_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 0fbb87ec4..b691b4d1f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 54f0df754..d5fda20f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) { } IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { - return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); } IR::Value Sample(TranslatorVisitor& v, u64 insn) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fbbe28632..e72505d61 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,6 +79,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU32F16: case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16S32: + case IR::Opcode::ConvertF16S64: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF16U32: + case IR::Opcode::ConvertF16U64: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -105,6 +113,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: case IR::Opcode::FPTrunc64: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64S32: + case IR::Opcode::ConvertF64S64: + case IR::Opcode::ConvertF64U8: + case IR::Opcode::ConvertF64U16: + case IR::Opcode::ConvertF64U32: + case IR::Opcode::ConvertF64U64: info.uses_fp64 = true; break; default: @@ -123,6 +139,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: case IR::Opcode::SelectU8: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF32S8: + case IR::Opcode::ConvertF32U8: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64U8: info.uses_int8 = true; break; default: @@ -149,6 +171,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU16F32: case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF32S16: + case IR::Opcode::ConvertF32U16: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64U16: info.uses_int16 = true; break; default: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 74acb8bb6..baa3d22df 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -70,6 +70,22 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::Identity; case IR::Opcode::ConvertF16F32: return IR::Opcode::Identity; + case IR::Opcode::ConvertF16S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF16S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF16S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF16S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF16U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF16U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF16U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF16U64: + return IR::Opcode::ConvertF32U64; default: return op; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c9da2080d..d1399a46d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; + const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, .support_float_controls = true, @@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } From e802512d8e49cc4a92c0c09fe023576c2a2ab3db Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 20 Mar 2021 21:22:21 +0100 Subject: [PATCH 074/770] shader: Refactor half floating instructions --- src/shader_recompiler/CMakeLists.txt | 2 + .../impl/half_floating_point_add.cpp | 60 +------------------ .../impl/half_floating_point_helper.cpp | 49 +++++++++++++++ .../impl/half_floating_point_helper.h | 31 ++++++++++ 4 files changed, 84 insertions(+), 58 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d0f0ec775..21c66ce13 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -78,6 +78,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/floating_point_set_predicate.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp + frontend/maxwell/translate/impl/half_floating_point_helper.cpp + frontend/maxwell/translate/impl/half_floating_point_helper.h frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index c292d5e87..19e3401ca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -2,66 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" namespace Shader::Maxwell { namespace { -enum class Merge : u64 { - H1_H0, - F32, - MRG_H0, - MRG_H1, -}; - -enum class Swizzle : u64 { - H1_H0, - F32, - H0_H0, - H1_H1, -}; - -std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { - switch (swizzle) { - case Swizzle::H1_H0: { - const IR::Value vector{ir.UnpackFloat2x16(value)}; - return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; - } - case Swizzle::H0_H0: { - const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; - return {scalar, scalar}; - } - case Swizzle::H1_H1: { - const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; - return {scalar, scalar}; - } - case Swizzle::F32: { - const IR::F32 scalar{ir.BitCast(value)}; - return {scalar, scalar}; - } - } - throw InvalidArgument("Invalid swizzle {}", swizzle); -} - -IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, - Merge merge) { - switch (merge) { - case Merge::H1_H0: - return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); - case Merge::F32: - return ir.BitCast(ir.FPConvert(32, lhs)); - case Merge::MRG_H0: - case Merge::MRG_H1: { - const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; - const bool h0{merge == Merge::MRG_H0}; - const IR::F16& insert{h0 ? lhs : rhs}; - return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); - } - } - throw InvalidArgument("Invalid merge {}", merge); -} void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { @@ -122,7 +66,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // Anonymous namespace +} // namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..930822092 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -0,0 +1,49 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..0933b595e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -0,0 +1,31 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge); + +} // namespace Shader::Maxwell From 76c8a962ac4eae77e71d66a72c448930240339f9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 19:11:56 -0300 Subject: [PATCH 075/770] spirv: Implement VertexId and InstanceId, refactor code --- .../backend/spirv/emit_context.cpp | 191 ++++++++++-------- .../backend/spirv/emit_context.h | 14 +- .../backend/spirv/emit_spirv.cpp | 107 +++++----- .../backend/spirv/emit_spirv.h | 4 +- .../spirv/emit_spirv_context_get_set.cpp | 16 ++ .../backend/spirv/emit_spirv_memory.cpp | 46 +++-- .../ir_opt/collect_shader_info_pass.cpp | 6 + src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 10 files changed, 244 insertions(+), 144 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 6c8f16562..4a4de3676 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -48,6 +48,25 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { } throw InvalidArgument("Invalid texture type {}", desc.type); } + +Id DefineVariable(EmitContext& ctx, Id type, std::optional builtin, + spv::StorageClass storage_class) { + const Id pointer_type{ctx.TypePointer(storage_class, type)}; + const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + ctx.interfaces.push_back(id); + return id; +} + +Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { + return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); +} + +Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { + return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -144,59 +163,8 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const Info& info, Stage stage) { - const auto define{ - [this](Id type, std::optional builtin, spv::StorageClass storage_class) { - const Id pointer_type{TypePointer(storage_class, type)}; - const Id id{AddGlobalVariable(pointer_type, storage_class)}; - if (builtin) { - Decorate(id, spv::Decoration::BuiltIn, *builtin); - } - interfaces.push_back(id); - return id; - }}; - using namespace std::placeholders; - const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; - const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; - - if (info.uses_workgroup_id) { - workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); - } - if (info.uses_local_invocation_id) { - local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); - } - if (info.loads_position) { - const bool is_fragment{stage != Stage::Fragment}; - const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = define_input(F32[4], built_in); - } - for (size_t i = 0; i < info.loads_generics.size(); ++i) { - if (info.loads_generics[i]) { - // FIXME: Declare size from input - input_generics[i] = define_input(F32[4], std::nullopt); - Decorate(input_generics[i], spv::Decoration::Location, static_cast(i)); - Name(input_generics[i], fmt::format("in_attr{}", i)); - } - } - if (info.stores_position) { - output_position = define_output(F32[4], spv::BuiltIn::Position); - } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (info.stores_generics[i]) { - output_generics[i] = define_output(F32[4], std::nullopt); - Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); - Name(output_generics[i], fmt::format("out_attr{}", i)); - } - } - if (stage == Stage::Fragment) { - for (size_t i = 0; i < 8; ++i) { - if (!info.stores_frag_color[i]) { - continue; - } - frag_color[i] = define_output(F32[4], std::nullopt); - Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); - Name(frag_color[i], fmt::format("frag_color{}", i)); - } - } + DefineInputs(info, stage); + DefineOutputs(info, stage); } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { @@ -225,33 +193,6 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { } } -void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, - u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; - uniform_types.*member_type = uniform_type; - - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("c{}", desc.index)); - for (size_t i = 0; i < desc.count; ++i) { - cbufs[desc.index + i].*member_type = id; - } - binding += desc.count; - } -} - void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { if (info.storage_buffers_descriptors.empty()) { return; @@ -311,4 +252,94 @@ void EmitContext::DefineLabels(IR::Program& program) { } } +void EmitContext::DefineInputs(const Info& info, Stage stage) { + if (info.uses_workgroup_id) { + workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); + } + if (info.uses_local_invocation_id) { + local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); + } + if (info.loads_position) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = DefineInput(*this, F32[4], built_in); + } + if (info.loads_instance_id) { + if (profile.support_vertex_instance_id) { + instance_id = DefineInput(*this, U32[1], spv::BuiltIn::InstanceId); + } else { + instance_index = DefineInput(*this, U32[1], spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], spv::BuiltIn::BaseInstance); + } + } + if (info.loads_vertex_id) { + if (profile.support_vertex_instance_id) { + vertex_id = DefineInput(*this, U32[1], spv::BuiltIn::VertexId); + } else { + vertex_index = DefineInput(*this, U32[1], spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex); + } + } + for (size_t index = 0; index < info.loads_generics.size(); ++index) { + if (!info.loads_generics[index]) { + continue; + } + // FIXME: Declare size from input + const Id id{DefineInput(*this, F32[4])}; + Decorate(id, spv::Decoration::Location, static_cast(index)); + Name(id, fmt::format("in_attr{}", index)); + input_generics[index] = id; + } +} + +void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; + uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + cbufs[desc.index + i].*member_type = id; + } + binding += desc.count; + } +} + +void EmitContext::DefineOutputs(const Info& info, Stage stage) { + if (info.stores_position) { + output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); + } + for (size_t i = 0; i < info.stores_generics.size(); ++i) { + if (info.stores_generics[i]) { + output_generics[i] = DefineOutput(*this, F32[4]); + Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); + Name(output_generics[i], fmt::format("out_attr{}", i)); + } + } + if (stage == Stage::Fragment) { + for (size_t i = 0; i < 8; ++i) { + if (!info.stores_frag_color[i]) { + continue; + } + frag_color[i] = DefineOutput(*this, F32[4]); + Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); + Name(frag_color[i], fmt::format("frag_color{}", i)); + } + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 2d7961ac3..9b9e0d6b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -82,6 +82,12 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id instance_id{}; + Id instance_index{}; + Id base_instance{}; + Id vertex_id{}; + Id vertex_index{}; + Id base_vertex{}; Id input_position{}; std::array input_generics{}; @@ -99,11 +105,15 @@ private: void DefineCommonConstants(); void DefineInterfaces(const Info& info, Stage stage); void DefineConstantBuffers(const Info& info, u32& binding); - void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, - Id type, char type_char, u32 element_size); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); + + void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, + Id type, char type_char, u32 element_size); + + void DefineInputs(const Info& info, Stage stage); + void DefineOutputs(const Info& info, Stage stage); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index b8978b94a..efd0b70b7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -113,6 +113,43 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { } } +Id DefineMain(EmitContext& ctx, IR::Program& program) { + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + ctx.AddLabel(block->Definition()); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); + } + } + ctx.OpFunctionEnd(); + return main; +} + +void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (env.ShaderStage()) { + case Shader::Stage::Compute: { + const std::array workgroup_size{env.WorkgroupSize()}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; + } + case Shader::Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Shader::Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + break; + default: + throw NotImplementedException("Stage {}", env.ShaderStage()); + } + ctx.AddEntryPoint(execution_model, main, "main", interfaces); +} + void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { if (!profile.support_float_controls) { @@ -173,6 +210,25 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } } +void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + if (info.uses_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } + if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + ctx.AddExtension("SPV_KHR_shader_draw_parameters"); + ctx.AddCapability(spv::Capability::DrawParameters); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); +} + Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { // Phi nodes can have forward declarations, if an argument is not defined provide a forward // declaration of it. Invoke will take care of giving it the right definition when it's @@ -202,53 +258,10 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, u32& binding) { EmitContext ctx{profile, program, binding}; - const Id void_function{ctx.TypeFunction(ctx.void_id)}; - const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; - for (IR::Block* const block : program.blocks) { - ctx.AddLabel(block->Definition()); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } - } - ctx.OpFunctionEnd(); - - const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); - spv::ExecutionModel execution_model{}; - switch (env.ShaderStage()) { - case Shader::Stage::Compute: { - const std::array workgroup_size{env.WorkgroupSize()}; - execution_model = spv::ExecutionModel::GLCompute; - ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - break; - } - case Shader::Stage::VertexB: - execution_model = spv::ExecutionModel::Vertex; - break; - case Shader::Stage::Fragment: - execution_model = spv::ExecutionModel::Fragment; - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); - break; - default: - throw NotImplementedException("Stage {}", env.ShaderStage()); - } - ctx.AddEntryPoint(execution_model, func, "main", interfaces); - - SetupDenormControl(profile, program, ctx, func); - const Info& info{program.info}; - if (info.uses_sampled_1d) { - ctx.AddCapability(spv::Capability::Sampled1D); - } - if (info.uses_sparse_residency) { - ctx.AddCapability(spv::Capability::SparseResidency); - } - if (info.uses_demote_to_helper_invocation) { - ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); - ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); - } - // TODO: Track this usage - ctx.AddCapability(spv::Capability::ImageGatherExtended); - + const Id main{DefineMain(ctx, program)}; + DefineEntryPoint(env, ctx, main); + SetupDenormControl(profile, program, ctx, main); + SetupCapabilities(profile, program.info, ctx); return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 1fe65f8a9..e297a0e20 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -81,8 +81,8 @@ void EmitLoadStorageS8(EmitContext& ctx); void EmitLoadStorageU16(EmitContext& ctx); void EmitLoadStorageS16(EmitContext& ctx); Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage64(EmitContext& ctx); -void EmitLoadStorage128(EmitContext& ctx); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx); void EmitWriteStorageS8(EmitContext& ctx); void EmitWriteStorageU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 02d115740..052b84151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -19,6 +19,10 @@ Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionZ: case IR::Attribute::PositionW: return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); + case IR::Attribute::InstanceId: + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + case IR::Attribute::VertexId: + return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); default: throw NotImplementedException("Read attribute {}", attr); } @@ -125,6 +129,18 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { + if (!ctx.profile.support_vertex_instance_id) { + switch (attr) { + case IR::Attribute::InstanceId: + return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_index), + ctx.OpLoad(ctx.U32[1], ctx.base_instance)); + case IR::Attribute::VertexId: + return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index), + ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); + default: + break; + } + } return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 7d3efc741..088bd3059 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -7,8 +7,8 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { - -static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { +namespace { +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { if (offset.IsImmediate()) { const u32 imm_offset{static_cast(offset.U32() / element_size)}; return ctx.Constant(ctx.U32[1], imm_offset); @@ -22,6 +22,32 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } +Id EmitLoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 num_components) { + // TODO: Support reinterpreting bindings, guaranteed to be aligned + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + std::array components; + for (u32 element = 0; element < num_components; ++element) { + Id index{base_index}; + if (element > 0) { + index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); + } + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + components[element] = ctx.OpLoad(ctx.U32[1], pointer); + } + if (num_components == 1) { + return components[0]; + } else { + const std::span components_span(components.data(), num_components); + return ctx.OpCompositeConstruct(ctx.U32[num_components], components_span); + } +} +} // Anonymous namespace + void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } @@ -95,21 +121,15 @@ void EmitLoadStorageS16(EmitContext&) { } Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - return ctx.OpLoad(ctx.U32[1], pointer); + return EmitLoadStorage(ctx, binding, offset, 1); } -void EmitLoadStorage64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return EmitLoadStorage(ctx, binding, offset, 2); } -void EmitLoadStorage128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return EmitLoadStorage(ctx, binding, offset, 4); } void EmitWriteStorageU8(EmitContext&) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e72505d61..e7fa3fce0 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -38,6 +38,12 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.loads_position = true; break; + case IR::Attribute::InstanceId: + info.loads_instance_id = true; + break; + case IR::Attribute::VertexId: + info.loads_vertex_id = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c6a143598..770299524 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -8,6 +8,7 @@ namespace Shader { struct Profile { bool unified_descriptor_binding{}; + bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; bool support_separate_rounding_mode{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 6eff762e2..f97730b34 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -59,6 +59,8 @@ struct Info { std::array loads_generics{}; bool loads_position{}; + bool loads_instance_id{}; + bool loads_vertex_id{}; std::array stores_frag_color{}; bool stores_frag_depth{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d1399a46d..90e1a30f6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -230,6 +230,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, + .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, From 28dff6a6298b714019aa10a47f5a9e3f3f689067 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 01:03:21 +0100 Subject: [PATCH 076/770] shader: Implement HFMA2 --- src/shader_recompiler/CMakeLists.txt | 1 + ...half_floating_point_fused_multiply_add.cpp | 170 ++++++++++++++++++ .../impl/half_floating_point_helper.cpp | 13 ++ .../impl/half_floating_point_helper.h | 8 + .../translate/impl/not_implemented.cpp | 20 --- 5 files changed, 192 insertions(+), 20 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 21c66ce13..42be817ec 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -78,6 +78,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/floating_point_set_predicate.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp + frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/half_floating_point_helper.cpp frontend/maxwell/translate/impl/half_floating_point_helper.h frontend/maxwell/translate/impl/impl.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..2f3996274 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -0,0 +1,170 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { + +void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, + Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, + bool sat, HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hfma2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; + const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + if (lhs_c.Type() == IR::Type::F16) { + lhs_c = v.ir.FPConvert(32, lhs_c); + rhs_c = v.ir.FPConvert(32, rhs_c); + } + } + + lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); + + lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); + rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{HalfPrecision2FmzMode(precision)}, + }; + IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); +} + +void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, + Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, + HalfPrecision precision) { + union { + u64 raw; + BitField<47, 2, Swizzle> swizzle_a; + BitField<49, 2, Merge> merge; + } const hfma2{insn}; + + HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, + sat, precision); +} + +} // namespace + +void TranslatorVisitor::HFMA2_reg(u64 insn) { + union { + u64 raw; + BitField<28, 2, Swizzle> swizzle_b; + BitField<32, 1, u64> saturate; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> neg_c; + BitField<35, 2, Swizzle> swizzle_c; + BitField<37, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, + GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_rc(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_b; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, + GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_cr(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, + GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_imm(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + + HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), + GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; + BitField<20, 32, u64> imm32; + BitField<52, 1, u64> neg_c; + BitField<53, 2, Swizzle> swizzle_a; + BitField<55, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast(hfma2.imm32)}; + HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, + Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp index 930822092..d0c6ba1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -6,6 +6,19 @@ namespace Shader::Maxwell { +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { + switch (precision) { + case HalfPrecision::None: + return IR::FmzMode::None; + case HalfPrecision::FTZ: + return IR::FmzMode::FTZ; + case HalfPrecision::FMZ: + return IR::FmzMode::FMZ; + default: + return IR::FmzMode::DontCare; + } +} + std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { switch (swizzle) { case Swizzle::H1_H0: { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index 0933b595e..f26ef0949 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -23,6 +23,14 @@ enum class Swizzle : u64 { H1_H1, }; +enum class HalfPrecision : u64 { + None = 0, + FTZ = 1, + FMZ = 2, +}; + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); + std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4078feafa..ddfca8d7a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,26 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HFMA2_reg(u64) { - ThrowNotImplemented(Opcode::HFMA2_reg); -} - -void TranslatorVisitor::HFMA2_rc(u64) { - ThrowNotImplemented(Opcode::HFMA2_rc); -} - -void TranslatorVisitor::HFMA2_cr(u64) { - ThrowNotImplemented(Opcode::HFMA2_cr); -} - -void TranslatorVisitor::HFMA2_imm(u64) { - ThrowNotImplemented(Opcode::HFMA2_imm); -} - -void TranslatorVisitor::HFMA2_32I(u64) { - ThrowNotImplemented(Opcode::HFMA2_32I); -} - void TranslatorVisitor::HMUL2_reg(u64) { ThrowNotImplemented(Opcode::HMUL2_reg); } From ed6cd3c94ac10b434a1240fc3cbed2050766be65 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 02:08:04 +0100 Subject: [PATCH 077/770] shader: Implement HMUL2 --- src/shader_recompiler/CMakeLists.txt | 1 + .../impl/half_floating_point_multiply.cpp | 143 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 16 -- 3 files changed, 144 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 42be817ec..24a2d61e0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -81,6 +81,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/half_floating_point_helper.cpp frontend/maxwell/translate/impl/half_floating_point_helper.h + frontend/maxwell/translate/impl/half_floating_point_multiply.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..ff34a8c8f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -0,0 +1,143 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { + +void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, + HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hmul2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{HalfPrecision2FmzMode(precision)}, + }; + IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); +} + +void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, + Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<47, 2, Swizzle> swizzle_a; + BitField<39, 2, HalfPrecision> precision; + } const hmul2{insn}; + + HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, + hmul2.precision); +} +} // namespace + +void TranslatorVisitor::HMUL2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<44, 1, u64> abs_a; + BitField<28, 2, Swizzle> swizzle_b; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, + hmul2.swizzle_b, GetReg20(insn)); +} + +void TranslatorVisitor::HMUL2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<54, 1, u64> abs_b; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, + Swizzle::F32, GetCbuf(insn)); +} + +void TranslatorVisitor::HMUL2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, + Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HMUL2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 2, HalfPrecision> precision; + BitField<52, 1, u64> sat; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hmul2{insn}; + + const u32 imm{static_cast(hmul2.imm32)}; + HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, + Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ddfca8d7a..6c159301f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,22 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HMUL2_reg(u64) { - ThrowNotImplemented(Opcode::HMUL2_reg); -} - -void TranslatorVisitor::HMUL2_cbuf(u64) { - ThrowNotImplemented(Opcode::HMUL2_cbuf); -} - -void TranslatorVisitor::HMUL2_imm(u64) { - ThrowNotImplemented(Opcode::HMUL2_imm); -} - -void TranslatorVisitor::HMUL2_32I(u64) { - ThrowNotImplemented(Opcode::HMUL2_32I); -} - void TranslatorVisitor::HSET2_reg(u64) { ThrowNotImplemented(Opcode::HSET2_reg); } From 9e213fd861d264cf79d7a6ed0268a57c87306b9b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 03:48:40 +0100 Subject: [PATCH 078/770] shader: Implement HSET2 --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/maxwell.inc | 4 +- .../impl/half_floating_point_helper.h | 1 + .../impl/half_floating_point_set.cpp | 115 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 -- 5 files changed, 119 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 24a2d61e0..eebca982b 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -82,6 +82,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/half_floating_point_helper.cpp frontend/maxwell/translate/impl/half_floating_point_helper.h frontend/maxwell/translate/impl/half_floating_point_multiply.cpp + frontend/maxwell/translate/impl/half_floating_point_set.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index f2a2ff331..1b87d04fc 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -105,8 +105,8 @@ INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") -INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 1100 1--- ----") -INST(HSET2_imm, "HSET2 (imm)", "0111 1100 0--- ----") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index f26ef0949..24063b2ab 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -5,6 +5,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..4825ca06a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -0,0 +1,115 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, + bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hset2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + // TODO: Implement FP16 FloatingPointCompare + //if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + //} + + lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(hset2.pred)}; + if (hset2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; + + const u32 true_value = bf ? 0x3c00 : 0xffff; + const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; + const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; + const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; + + v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); +} +} // Anonymous namespace + +void TranslatorVisitor::HSET2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> bf; + BitField<31, 1, u64> neg_b; + BitField<50, 1, u64> ftz; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, + hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); +} + +void TranslatorVisitor::HSET2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + } const hset2{insn}; + + HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, + hset2.compare_op, Swizzle::F32); +} + +void TranslatorVisitor::HSET2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, + hset2.compare_op, Swizzle::H1_H0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6c159301f..d1aeceef1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,18 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HSET2_reg(u64) { - ThrowNotImplemented(Opcode::HSET2_reg); -} - -void TranslatorVisitor::HSET2_cbuf(u64) { - ThrowNotImplemented(Opcode::HSET2_cbuf); -} - -void TranslatorVisitor::HSET2_imm(u64) { - ThrowNotImplemented(Opcode::HSET2_imm); -} - void TranslatorVisitor::HSETP2_reg(u64) { ThrowNotImplemented(Opcode::HSETP2_reg); } From e10d9c1b8e21912d34c02a22b5812b94fc27502b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:24:30 +0100 Subject: [PATCH 079/770] shader: Implement HSETP2 --- src/shader_recompiler/CMakeLists.txt | 1 + .../half_floating_point_set_predicate.cpp | 116 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 -- 3 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index eebca982b..87654931b 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -82,6 +82,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/half_floating_point_helper.cpp frontend/maxwell/translate/impl/half_floating_point_helper.h frontend/maxwell/translate/impl/half_floating_point_multiply.cpp + frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp frontend/maxwell/translate/impl/half_floating_point_set.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..6b1ac21d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, + Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { + union { + u64 insn; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<6, 1, u64> ftz; + BitField<47, 2, Swizzle> swizzle_a; + } const hsetp2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + // TODO: Implement FP16 FloatingPointCompare + // if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + //} + + lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; + if (hsetp2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; + + if (h_and) { + auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); + v.ir.SetPred(hsetp2.dest_pred_a, result); + v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); + } else { + v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); + v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); + } +} +} // Anonymous namespace + +void TranslatorVisitor::HSETP2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> h_and; + BitField<31, 1, u64> neg_b; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hsetp2{insn}; + HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> abs_b; + BitField<56, 1, u64> neg_b; + BitField<49, 4, FPCompareOp> compare_op; + } const hsetp2{insn}; + + HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hsetp2{insn}; + + const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + + HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, + hsetp2.h_and != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d1aeceef1..bd3c1f9d6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,18 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HSETP2_reg(u64) { - ThrowNotImplemented(Opcode::HSETP2_reg); -} - -void TranslatorVisitor::HSETP2_cbuf(u64) { - ThrowNotImplemented(Opcode::HSETP2_cbuf); -} - -void TranslatorVisitor::HSETP2_imm(u64) { - ThrowNotImplemented(Opcode::HSETP2_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } From 27fb97377eeb40849260ea866a90519521c6f59b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:33:19 +0100 Subject: [PATCH 080/770] shader: Fix floating point comparison for FP16 --- .../maxwell/translate/impl/common_funcs.cpp | 2 +- .../maxwell/translate/impl/common_funcs.h | 4 +-- .../impl/half_floating_point_set.cpp | 34 +++++++++---------- .../half_floating_point_set_predicate.cpp | 20 +++++------ .../ir_opt/lower_fp16_to_fp32.cpp | 24 +++++++++++++ 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index af9a8f82c..d30e82b10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,7 +72,7 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f8add3c34..f584060b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -18,7 +18,7 @@ namespace Shader::Maxwell { [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); -[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, - const IR::F32& operand_2, FPCompareOp compare_op, +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 4825ca06a..1d28c0531 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -22,8 +22,8 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - //if (lhs_a.Type() != lhs_b.Type()) { + + if (lhs_a.Type() != lhs_b.Type()) { if (lhs_a.Type() == IR::Type::F16) { lhs_a = v.ir.FPConvert(32, lhs_a); rhs_a = v.ir.FPConvert(32, rhs_a); @@ -32,7 +32,7 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f lhs_b = v.ir.FPConvert(32, lhs_b); rhs_b = v.ir.FPConvert(32, rhs_b); } - //} + } lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); @@ -94,22 +94,22 @@ void TranslatorVisitor::HSET2_cbuf(u64 insn) { } void TranslatorVisitor::HSET2_imm(u64 insn) { - union { - u64 insn; - BitField<53, 1, u64> bf; - BitField<54, 1, u64> ftz; - BitField<49, 4, FPCompareOp> compare_op; - BitField<56, 1, u64> neg_high; - BitField<30, 9, u64> high; - BitField<29, 1, u64> neg_low; - BitField<20, 9, u64> low; - } const hset2{insn}; + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; - HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, - hset2.compare_op, Swizzle::H1_H0); + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, + Swizzle::H1_H0); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 6b1ac21d5..3e2a23c92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -24,17 +24,17 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - // if (lhs_a.Type() != lhs_b.Type()) { - if (lhs_a.Type() == IR::Type::F16) { - lhs_a = v.ir.FPConvert(32, lhs_a); - rhs_a = v.ir.FPConvert(32, rhs_a); + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } } - if (lhs_b.Type() == IR::Type::F16) { - lhs_b = v.ir.FPConvert(32, lhs_b); - rhs_b = v.ir.FPConvert(32, rhs_b); - } - //} lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index baa3d22df..7723c9a57 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -50,6 +50,30 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::CompositeInsertF32x3; case IR::Opcode::CompositeInsertF16x4: return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual16: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual16: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual16: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual16: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan16: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan16: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan16: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan16: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual16: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual16: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual16: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual16: + return IR::Opcode::FPUnordGreaterThanEqual32; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: From a77e764726938a26803fa90a9c69ccdd32ab09cd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 00:42:56 -0300 Subject: [PATCH 081/770] shader: Add support for fp16 comparisons and misc fixes --- .../backend/spirv/emit_spirv.h | 2 ++ .../spirv/emit_spirv_floating_point.cpp | 8 +++++++ .../frontend/ir/ir_emitter.cpp | 23 +++++++++++++++---- .../frontend/ir/ir_emitter.h | 6 ++--- src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../impl/half_floating_point_add.cpp | 3 +-- ...half_floating_point_fused_multiply_add.cpp | 4 +--- .../impl/half_floating_point_multiply.cpp | 3 +-- .../impl/half_floating_point_set.cpp | 1 + .../ir_opt/collect_shader_info_pass.cpp | 16 +++++++++++++ .../ir_opt/lower_fp16_to_fp32.cpp | 2 ++ 11 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index e297a0e20..486ef10a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -234,7 +234,9 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e635b1ffb..1fdf66cb6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -346,8 +346,16 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); } +Id EmitFPIsNan16(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + Id EmitFPIsNan32(EmitContext& ctx, Id value) { return ctx.OpIsNan(ctx.U1, value); } +Id EmitFPIsNan64(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 652f6949e..1eda95071 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC } } -U1 IREmitter::FPIsNan(const F32& value) { - return Inst(Opcode::FPIsNan32, value); +U1 IREmitter::FPIsNan(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::FPIsNan16, value); + case Type::F32: + return Inst(Opcode::FPIsNan32, value); + case Type::F64: + return Inst(Opcode::FPIsNan64, value); + default: + ThrowInvalidType(value.Type()); + } } -U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { +U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); } -U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { +U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8edb11154..ab4537d88 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -161,9 +161,9 @@ public: FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, bool ordered = true); - [[nodiscard]] U1 FPIsNan(const F32& value); - [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); - [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); + [[nodiscard]] U1 FPIsNan(const F16F32F64& value); + [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); + [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8471db7b9..884eea7a8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan16, U1, F16, ) OPCODE(FPIsNan32, U1, F32, ) +OPCODE(FPIsNan64, U1, F64, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { union { @@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, HalfPrecision precision) { @@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, sat, precision); } - -} // namespace +} // Anonymous namespace void TranslatorVisitor::HFMA2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, HalfPrecision precision) { @@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, hmul2.precision); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HMUL2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) { BitField<35, 4, FPCompareOp> compare_op; BitField<28, 2, Swizzle> swizzle_b; } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e7fa3fce0..fd6069c65 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -74,6 +74,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::CompositeInsertF16x2: + case IR::Opcode::CompositeInsertF16x3: + case IR::Opcode::CompositeInsertF16x4: case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: @@ -103,6 +106,19 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: + case IR::Opcode::FPOrdEqual16: + case IR::Opcode::FPUnordEqual16: + case IR::Opcode::FPOrdNotEqual16: + case IR::Opcode::FPUnordNotEqual16: + case IR::Opcode::FPOrdLessThan16: + case IR::Opcode::FPUnordLessThan16: + case IR::Opcode::FPOrdGreaterThan16: + case IR::Opcode::FPUnordGreaterThan16: + case IR::Opcode::FPOrdLessThanEqual16: + case IR::Opcode::FPUnordLessThanEqual16: + case IR::Opcode::FPOrdGreaterThanEqual16: + case IR::Opcode::FPUnordGreaterThanEqual16: + case IR::Opcode::FPIsNan16: info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 7723c9a57..0e8862f45 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -74,6 +74,8 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::FPOrdGreaterThanEqual32; case IR::Opcode::FPUnordGreaterThanEqual16: return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan16: + return IR::Opcode::FPIsNan32; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: From 112b8f00f0da0e031bb62a7a7a44469d3a5518a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 01:32:02 -0400 Subject: [PATCH 082/770] shader: Add FP64 register load/store helpers --- .../maxwell/translate/impl/double_add.cpp | 16 ++--------- .../frontend/maxwell/translate/impl/impl.cpp | 27 ++++++++++++++----- .../frontend/maxwell/translate/impl/impl.h | 2 ++ 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index bece191d7..3db09d0c2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -22,19 +22,11 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<48, 1, u64> neg_a; BitField<49, 1, u64> abs_b; } const dadd{insn}; - - if (!IR::IsAligned(dadd.dest_reg, 2)) { - throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value()); - } - if (!IR::IsAligned(dadd.src_a_reg, 2)) { - throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value()); - } if (dadd.cc != 0) { throw NotImplementedException("DADD CC"); } - const IR::Reg reg_a{dadd.src_a_reg}; - const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))}; + const IR::F64 src_a{v.D(dadd.src_a_reg)}; const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; @@ -43,12 +35,8 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { .rounding{CastFpRounding(dadd.fp_rounding)}, .fmz_mode{IR::FmzMode::None}, }; - const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)}; - const IR::Value result{v.ir.UnpackDouble2x32(value)}; - for (int i = 0; i < 2; i++) { - v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)}); - } + v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index c9af83010..2d2f6f9c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -25,6 +25,13 @@ IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } +IR::F64 TranslatorVisitor::D(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } @@ -33,6 +40,16 @@ void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } +void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + IR::U32 TranslatorVisitor::GetReg8(u64 insn) { union { u64 raw; @@ -68,13 +85,9 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { union { u64 raw; - BitField<20, 8, IR::Reg> src; - } const index{insn}; - const IR::Reg reg{index.src}; - if (!IR::IsAligned(reg, 2)) { - throw NotImplementedException("Unaligned source register {}", reg); - } - return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1))); + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); } static std::pair CbufAddr(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index cb66cca25..1a1073fa7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -342,9 +342,11 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); + [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + void D(IR::Reg dest_reg, const IR::F64& value); [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); From c858b8ba97d3ff79dcff0795c1184ee356f2cd1a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 02:09:14 -0400 Subject: [PATCH 083/770] shader: Implement DMUL and DFMA Also add a missing const on DADD --- src/shader_recompiler/CMakeLists.txt | 2 + .../frontend/maxwell/maxwell.inc | 2 +- .../maxwell/translate/impl/double_add.cpp | 2 +- .../impl/double_fused_multiply_add.cpp | 53 +++++++++++++++++++ .../translate/impl/double_multiply.cpp | 45 ++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 8 +++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../translate/impl/not_implemented.cpp | 28 ---------- 8 files changed, 111 insertions(+), 30 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 87654931b..fb5d60334 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/double_fused_multiply_add.cpp + frontend/maxwell/translate/impl/double_multiply.cpp frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1b87d04fc..1dfaeb92f 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -35,7 +35,7 @@ INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") -INST(DFMA_cr, "DFMA (cr)", "0010 1011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index 3db09d0c2..ac1433dea 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -30,7 +30,7 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; - IR::FpControl control{ + const IR::FpControl control{ .no_contraction{true}, .rounding{CastFpRounding(dadd.fp_rounding)}, .fmz_mode{IR::FmzMode::None}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..ff7321862 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<50, 2, FpRounding> fp_rounding; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + } const dfma{insn}; + + const IR::F64 src_a{v.D(dfma.src_a_reg)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; + const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; + + const IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dfma.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + + v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DFMA_reg(u64 insn) { + DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_cr(u64 insn) { + DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_rc(u64 insn) { + DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DFMA_imm(u64 insn) { + DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..3e83d1c95 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<48, 1, u64> neg; + } const dmul{insn}; + + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; + const IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dmul.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + + v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DMUL_reg(u64 insn) { + DMUL(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMUL_cbuf(u64 insn) { + DMUL(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMUL_imm(u64 insn) { + DMUL(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 2d2f6f9c6..758a0230a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -90,6 +90,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { return D(reg.index); } +IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + static std::pair CbufAddr(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 1a1073fa7..c994fe803 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -354,6 +354,7 @@ public: [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index bd3c1f9d6..4e069912a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,22 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DFMA_reg(u64) { - ThrowNotImplemented(Opcode::DFMA_reg); -} - -void TranslatorVisitor::DFMA_rc(u64) { - ThrowNotImplemented(Opcode::DFMA_rc); -} - -void TranslatorVisitor::DFMA_cr(u64) { - ThrowNotImplemented(Opcode::DFMA_cr); -} - -void TranslatorVisitor::DFMA_imm(u64) { - ThrowNotImplemented(Opcode::DFMA_imm); -} - void TranslatorVisitor::DMNMX_reg(u64) { ThrowNotImplemented(Opcode::DMNMX_reg); } @@ -109,18 +93,6 @@ void TranslatorVisitor::DMNMX_imm(u64) { ThrowNotImplemented(Opcode::DMNMX_imm); } -void TranslatorVisitor::DMUL_reg(u64) { - ThrowNotImplemented(Opcode::DMUL_reg); -} - -void TranslatorVisitor::DMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::DMUL_cbuf); -} - -void TranslatorVisitor::DMUL_imm(u64) { - ThrowNotImplemented(Opcode::DMUL_imm); -} - void TranslatorVisitor::DSET_reg(u64) { ThrowNotImplemented(Opcode::DSET_reg); } From 8b3b9c3371626c217a3865adae26191fce31ccce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 17:27:44 -0300 Subject: [PATCH 084/770] shader: Add missing fp64 usage flags --- .../ir_opt/collect_shader_info_pass.cpp | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fd6069c65..2a66403de 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -121,6 +121,19 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPIsNan16: info.uses_fp16 = true; break; + case IR::Opcode::CompositeConstructF64x2: + case IR::Opcode::CompositeConstructF64x3: + case IR::Opcode::CompositeConstructF64x4: + case IR::Opcode::CompositeExtractF64x2: + case IR::Opcode::CompositeExtractF64x3: + case IR::Opcode::CompositeExtractF64x4: + case IR::Opcode::CompositeInsertF64x2: + case IR::Opcode::CompositeInsertF64x3: + case IR::Opcode::CompositeInsertF64x4: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF64U64: + case IR::Opcode::PackDouble2x32: + case IR::Opcode::UnpackDouble2x32: case IR::Opcode::FPAbs64: case IR::Opcode::FPAdd64: case IR::Opcode::FPCeil64: @@ -135,6 +148,27 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: case IR::Opcode::FPTrunc64: + case IR::Opcode::FPOrdEqual64: + case IR::Opcode::FPUnordEqual64: + case IR::Opcode::FPOrdNotEqual64: + case IR::Opcode::FPUnordNotEqual64: + case IR::Opcode::FPOrdLessThan64: + case IR::Opcode::FPUnordLessThan64: + case IR::Opcode::FPOrdGreaterThan64: + case IR::Opcode::FPUnordGreaterThan64: + case IR::Opcode::FPOrdLessThanEqual64: + case IR::Opcode::FPUnordLessThanEqual64: + case IR::Opcode::FPOrdGreaterThanEqual64: + case IR::Opcode::FPUnordGreaterThanEqual64: + case IR::Opcode::FPIsNan64: + case IR::Opcode::ConvertS16F64: + case IR::Opcode::ConvertS32F64: + case IR::Opcode::ConvertS64F64: + case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertU32F64: + case IR::Opcode::ConvertU64F64: + case IR::Opcode::ConvertF32F64: + case IR::Opcode::ConvertF64F32: case IR::Opcode::ConvertF64S8: case IR::Opcode::ConvertF64S16: case IR::Opcode::ConvertF64S32: From a62f04efab4331eeabd4441962f86a5e87db3f2d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 09:32:16 +0100 Subject: [PATCH 085/770] shader: Implement F2F --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/ir/ir_emitter.cpp | 12 +- .../frontend/ir/ir_emitter.h | 3 +- ...oating_point_conversion_floating_point.cpp | 180 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 12 -- .../ir_opt/collect_shader_info_pass.cpp | 4 +- 6 files changed, 192 insertions(+), 20 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fb5d60334..63ba1c75f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -72,6 +72,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp + frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_min_max.cpp diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1eda95071..00c909f3e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1361,7 +1361,7 @@ U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } -F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) { switch (result_bitsize) { case 16: switch (value.Type()) { @@ -1369,7 +1369,7 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { // Nothing to do return value; case Type::F32: - return Inst(Opcode::ConvertF16F32, value); + return Inst(Opcode::ConvertF16F32, Flags{control}, value); case Type::F64: throw LogicError("Illegal conversion from F64 to F16"); default: @@ -1379,12 +1379,12 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { case 32: switch (value.Type()) { case Type::F16: - return Inst(Opcode::ConvertF32F16, value); + return Inst(Opcode::ConvertF32F16, Flags{control}, value); case Type::F32: // Nothing to do return value; case Type::F64: - return Inst(Opcode::ConvertF32F64, value); + return Inst(Opcode::ConvertF32F64, Flags{control}, value); default: break; } @@ -1394,10 +1394,10 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { case Type::F16: throw LogicError("Illegal conversion from F16 to F64"); case Type::F32: + return Inst(Opcode::ConvertF64F32, Flags{control}, value); + case Type::F64: // Nothing to do return value; - case Type::F64: - return Inst(Opcode::ConvertF32F64, value); default: break; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ab4537d88..346cef3ab 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -216,7 +216,8 @@ public: const Value& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, + FpControl control = {}); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, const Value& offset, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..1e366fde0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -0,0 +1,180 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class RoundingOp : u64 { + None = 0, + Pass = 3, + Round = 8, + Floor = 9, + Ceil = 10, + Trunc = 11, +}; + +[[nodiscard]] u32 WidthSize(FloatFormat width) { + switch (width) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg; + BitField<50, 1, u64> sat; + BitField<39, 4, u64> rounding_op; + BitField<39, 2, FpRounding> rounding; + BitField<10, 2, FloatFormat> src_size; + BitField<8, 2, FloatFormat> dst_size; + + [[nodiscard]] RoundingOp RoundingOperation() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast(rounding_op.Value() & rounding_mask); + } + } const f2f{insn}; + + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; + + const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; + IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + if (f2f.src_size != f2f.dst_size) { + fp_control.rounding = CastFpRounding(f2f.rounding); + input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); + } else { + switch (f2f.RoundingOperation()) { + case RoundingOp::None: + case RoundingOp::Pass: + // Make sure NANs are handled properly + switch (f2f.src_size) { + case FloatFormat::F16: + input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); + break; + case FloatFormat::F32: + input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); + break; + case FloatFormat::F64: + input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); + break; + } + break; + case RoundingOp::Round: + input = v.ir.FPRoundEven(input, fp_control); + break; + case RoundingOp::Floor: + input = v.ir.FPFloor(input, fp_control); + break; + case RoundingOp::Ceil: + input = v.ir.FPCeil(input, fp_control); + break; + case RoundingOp::Trunc: + input = v.ir.FPTrunc(input, fp_control); + break; + default: + throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); + } + } + if (f2f.sat != 0 && !any_fp64) { + input = v.ir.FPSaturate(input); + } + + switch (f2f.dst_size) { + case FloatFormat::F16: { + const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); + break; + } + case FloatFormat::F32: + v.F(f2f.dest_reg, input); + break; + case FloatFormat::F64: + v.D(f2f.dest_reg, input); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2F_reg(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatReg20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleReg20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_cbuf(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatCbuf(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleCbuf(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { + throw NotImplementedException("Instruction"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4e069912a..08f6eb788 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -117,18 +117,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::F2F_reg(u64) { - ThrowNotImplemented(Opcode::F2F_reg); -} - -void TranslatorVisitor::F2F_cbuf(u64) { - ThrowNotImplemented(Opcode::F2F_cbuf); -} - -void TranslatorVisitor::F2F_imm(u64) { - ThrowNotImplemented(Opcode::F2F_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2a66403de..e9f64cf3f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -404,7 +404,9 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPOrdLessThanEqual32: case IR::Opcode::FPUnordLessThanEqual32: case IR::Opcode::FPOrdGreaterThanEqual32: - case IR::Opcode::FPUnordGreaterThanEqual32: { + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: { const auto control{inst.Flags()}; switch (control.fmz_mode) { case IR::FmzMode::DontCare: From 56be556eee65335cdc896bb1eb47999d04850b77 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 18:23:16 +0100 Subject: [PATCH 086/770] shader: Implement FADD32I --- .../translate/impl/floating_point_add.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 76a807d4e..487198aa6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -64,8 +64,21 @@ void TranslatorVisitor::FADD_imm(u64 insn) { FADD(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FADD32I(u64) { - throw NotImplementedException("FADD32I"); +void TranslatorVisitor::FADD32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<53, 1, u64> neg_b; + BitField<54, 1, u64> abs_a; + BitField<52, 1, u64> cc; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd32i{insn}; + + FADD(*this, insn, fadd32i.sat != 0, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, + GetFloatImm32(insn), fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, + fadd32i.neg_b != 0); } } // namespace Shader::Maxwell From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: [PATCH 087/770] shader: Implement DMNMX, DSET, DSETP --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_spirv.cpp | 39 ++++++------ .../backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_select.cpp | 4 ++ .../frontend/ir/ir_emitter.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/double_compare_and_set.cpp | 59 +++++++++++++++++++ .../maxwell/translate/impl/double_min_max.cpp | 50 ++++++++++++++++ .../translate/impl/double_set_predicate.cpp | 54 +++++++++++++++++ .../translate/impl/floating_point_min_max.cpp | 2 +- .../translate/impl/not_implemented.cpp | 36 ----------- .../ir_opt/collect_shader_info_pass.cpp | 1 + .../ir_opt/constant_propagation_pass.cpp | 10 +++- src/shader_recompiler/profile.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + 16 files changed, 210 insertions(+), 59 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 63ba1c75f..23cb523a8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,8 +65,11 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/double_compare_and_set.cpp frontend/maxwell/translate/impl/double_fused_multiply_add.cpp + frontend/maxwell/translate/impl/double_min_max.cpp frontend/maxwell/translate/impl/double_multiply.cpp + frontend/maxwell/translate/impl/double_set_predicate.cpp frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index efd0b70b7..93e851133 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -152,24 +152,7 @@ void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { - if (!profile.support_float_controls) { - return; - } const Info& info{program.info}; - if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && - !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { - return; - } - ctx.AddExtension("SPV_KHR_float_controls"); - - if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { - ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); - } - if (profile.support_fp32_signed_zero_nan_preserve) { - ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); - } if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { @@ -210,6 +193,22 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } } +void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, + EmitContext& ctx, Id main_func) { + if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); + } + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + } + if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U); + } +} + void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); @@ -260,7 +259,11 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(env, ctx, main); - SetupDenormControl(profile, program, ctx, main); + if (profile.support_float_controls) { + ctx.AddExtension("SPV_KHR_float_controls"); + SetupDenormControl(profile, program, ctx, main); + SetupSignedNanCapabilities(profile, program, ctx, main); + } SetupCapabilities(profile, program.info, ctx); return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 486ef10a7..960d022ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -136,6 +136,7 @@ Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0ae127d50..8b0562da5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -35,4 +35,8 @@ Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); } +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 00c909f3e..432dd29a5 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -529,6 +529,8 @@ Value IREmitter::Select(const U1& condition, const Value& true_value, const Valu return Inst(Opcode::SelectU64, condition, true_value, false_value); case Type::F32: return Inst(Opcode::SelectF32, condition, true_value, false_value); + case Type::F64: + return Inst(Opcode::SelectF64, condition, true_value, false_value); default: throw InvalidArgument("Invalid type {}", true_value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 884eea7a8..bdc07b9a7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -131,6 +131,7 @@ OPCODE(SelectU32, U32, U1, OPCODE(SelectU64, U64, U1, U64, U64, ) OPCODE(SelectF16, F16, U1, F16, F16, ) OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectF64, F64, U1, F64, F64, ) // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1dfaeb92f..c6cd2a79b 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -37,8 +37,8 @@ INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") -INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") -INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..e2ec852c9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + } const dset{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; + + IR::U1 pred{v.ir.GetPred(dset.pred)}; + if (dset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + + v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DSET_reg(u64 insn) { + DSET(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSET_cbuf(u64 insn) { + DSET(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSET_imm(u64 insn) { + DSET(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..55a224db3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const dmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; + + IR::F64 max{v.ir.FPMax(op_a, op_b)}; + IR::F64 min{v.ir.FPMin(op_a, op_b)}; + + if (dmnmx.neg_pred != 0) { + std::swap(min, max); + } + v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DMNMX_reg(u64 insn) { + DMNMX(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMNMX_cbuf(u64 insn) { + DMNMX(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMNMX_imm(u64 insn) { + DMNMX(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + } const dsetp{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; + + const BooleanOp bop{dsetp.bop}; + const FPCompareOp compare_op{dsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; + const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(dsetp.dest_pred_a, result_a); + v.ir.SetPred(dsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::DSETP_reg(u64 insn) { + DSETP(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSETP_cbuf(u64 insn) { + DSETP(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSETP_imm(u64 insn) { + DSETP(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index c3180a9bd..343d91032 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -24,7 +24,7 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; - const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ .no_contraction{false}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 08f6eb788..27b12ff3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,42 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DMNMX_reg(u64) { - ThrowNotImplemented(Opcode::DMNMX_reg); -} - -void TranslatorVisitor::DMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::DMNMX_cbuf); -} - -void TranslatorVisitor::DMNMX_imm(u64) { - ThrowNotImplemented(Opcode::DMNMX_imm); -} - -void TranslatorVisitor::DSET_reg(u64) { - ThrowNotImplemented(Opcode::DSET_reg); -} - -void TranslatorVisitor::DSET_cbuf(u64) { - ThrowNotImplemented(Opcode::DSET_cbuf); -} - -void TranslatorVisitor::DSET_imm(u64) { - ThrowNotImplemented(Opcode::DSET_imm); -} - -void TranslatorVisitor::DSETP_reg(u64) { - ThrowNotImplemented(Opcode::DSETP_reg); -} - -void TranslatorVisitor::DSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::DSETP_cbuf); -} - -void TranslatorVisitor::DSETP_imm(u64) { - ThrowNotImplemented(Opcode::DSETP_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e9f64cf3f..f44eac5d8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -130,6 +130,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::CompositeInsertF64x2: case IR::Opcode::CompositeInsertF64x3: case IR::Opcode::CompositeInsertF64x4: + case IR::Opcode::SelectF64: case IR::Opcode::BitCastU64F64: case IR::Opcode::BitCastF64U64: case IR::Opcode::PackDouble2x32: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index a39db2bf1..ef7766d22 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -229,7 +229,6 @@ void FoldISub32(IR::Inst& inst) { } } -template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -340,8 +339,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); + case IR::Opcode::SelectU1: + case IR::Opcode::SelectU8: + case IR::Opcode::SelectU16: case IR::Opcode::SelectU32: - return FoldSelect(inst); + case IR::Opcode::SelectU64: + case IR::Opcode::SelectF16: + case IR::Opcode::SelectF32: + case IR::Opcode::SelectF64: + return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); case IR::Opcode::LogicalOr: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 770299524..3181c79fb 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,7 @@ struct Profile { bool support_fp32_denorm_flush{}; bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; + bool support_fp64_signed_zero_nan_preserve{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 90e1a30f6..75f7c1e61 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -244,6 +244,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .support_fp64_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } From 96b7ced6ec32ccd3da94ebfcfe74a7568cce509f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 21:01:47 -0300 Subject: [PATCH 088/770] shader: Better but still partial interpolation support --- .../maxwell/translate/impl/load_store_attribute.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 2922145ee..516ffec2d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -141,14 +141,16 @@ void TranslatorVisitor::IPA(u64 insn) { const IR::Attribute attribute{ipa.attribute}; IR::F32 value{ir.GetAttribute(attribute)}; if (IR::IsGeneric(attribute)) { - // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; - const bool is_perspective{false}; + const ProgramHeader& sph{env.SPH()}; + const u32 attr_index{IR::GenericAttributeIndex(attribute)}; + const u32 element{static_cast(attribute) % 4}; + const std::array input_map{sph.ps.GenericInputMap(attr_index)}; + const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; if (is_perspective) { - const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; - value = ir.FPMul(value, rcp_position_w); + const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; + value = ir.FPMul(value, position_w); } } - switch (ipa.interpolation_mode) { case InterpolationMode::Pass: break; From 2be5c7eff40647344da7951dc5e20a7deebf78aa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 21:56:27 -0300 Subject: [PATCH 089/770] shader: Fold interpolation multiplications --- .../ir_opt/constant_propagation_pass.cpp | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ef7766d22..3dab424f6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -236,6 +236,38 @@ void FoldSelect(IR::Inst& inst) { } } +void FoldFPMul32(IR::Inst& inst) { + const auto control{inst.Flags()}; + if (control.no_contraction) { + return; + } + // Fold interpolation operations + const IR::Value lhs_value{inst.Arg(0)}; + const IR::Value rhs_value{inst.Arg(1)}; + if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { + return; + } + IR::Inst* const lhs_op{lhs_value.InstRecursive()}; + IR::Inst* const rhs_op{rhs_value.InstRecursive()}; + if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + return; + } + const IR::Value recip_source{rhs_op->Arg(0)}; + const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()}; + if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) { + return; + } + IR::Inst* const attr_a{recip_source.InstRecursive()}; + IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; + if (attr_a->Opcode() != IR::Opcode::GetAttribute || + attr_b->Opcode() != IR::Opcode::GetAttribute) { + return; + } + if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { + inst.ReplaceUsesWith(lhs_op->Arg(0)); + } +} + void FoldLogicalAnd(IR::Inst& inst) { if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; @@ -348,6 +380,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::FPMul32: + return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); case IR::Opcode::LogicalOr: From c63cf4fa2e22538a01c191e1f97ac0f93b67e804 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: [PATCH 090/770] vk_pipeline_cache: Add pipeline cache --- src/shader_recompiler/environment.h | 11 +- src/shader_recompiler/file_environment.cpp | 4 +- src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/stage.h | 4 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 413 +++++++++++++----- .../renderer_vulkan/vk_pipeline_cache.h | 34 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 1 + .../renderer_vulkan/vk_render_pass_cache.h | 4 +- 8 files changed, 358 insertions(+), 117 deletions(-) diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 1fcaa56dd..6dec4b255 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,8 +3,8 @@ #include #include "common/common_types.h" -#include "shader_recompiler/stage.h" #include "shader_recompiler/program_header.h" +#include "shader_recompiler/stage.h" namespace Shader { @@ -14,9 +14,9 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; - [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; - [[nodiscard]] virtual std::array WorkgroupSize() = 0; + [[nodiscard]] virtual std::array WorkgroupSize() const = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { return sph; @@ -26,9 +26,14 @@ public: return stage; } + [[nodiscard]] u32 StartAddress() const noexcept { + return start_address; + } + protected: ProgramHeader sph{}; Stage stage{}; + u32 start_address{}; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 21700c72b..f2104f444 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -39,11 +39,11 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { return data[offset / 8]; } -u32 FileEnvironment::TextureBoundBuffer() { +u32 FileEnvironment::TextureBoundBuffer() const { throw NotImplementedException("Texture bound buffer serialization"); } -std::array FileEnvironment::WorkgroupSize() { +std::array FileEnvironment::WorkgroupSize() const { return {1, 1, 1}; } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 62302bc8e..17640a622 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -14,9 +14,9 @@ public: u64 ReadInstruction(u32 offset) override; - u32 TextureBoundBuffer() override; + u32 TextureBoundBuffer() const override; - std::array WorkgroupSize() override; + std::array WorkgroupSize() const override; private: std::vector data; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index fc6ce6043..7d4f2c0bb 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -4,9 +4,11 @@ #pragma once +#include "common/common_types.h" + namespace Shader { -enum class Stage { +enum class Stage : u32 { Compute, VertexA, VertexB, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 75f7c1e61..41fc9588f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -4,12 +4,15 @@ #include #include +#include #include #include #include "common/bit_cast.h" #include "common/cityhash.h" +#include "common/file_util.h" #include "common/microprofile.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/memory.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -37,18 +40,23 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -namespace { -using Shader::Backend::SPIRV::EmitSPIRV; +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; + } ~GenericEnvironment() override = default; - std::optional Analyze(u32 start_address) { + std::optional Analyze() { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -66,11 +74,15 @@ public: return read_highest - read_lowest + INST_SIZE; } + [[nodiscard]] bool CanBeSerialized() const noexcept { + return has_unbound_instructions; + } + [[nodiscard]] u128 CalculateHash() const { const size_t size{ReadSize()}; - auto data = std::make_unique(size); + const auto data{std::make_unique(size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(reinterpret_cast(data.get()), size); + return Common::CityHash128(data.get(), size); } u64 ReadInstruction(u32 address) final { @@ -80,9 +92,32 @@ public: if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } + has_unbound_instructions = true; return gpu_memory->Read(program_base + address); } + void Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(ReadSize())}; + const auto data{std::make_unique(code_size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + + const u32 texture_bound{TextureBoundBuffer()}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(data.get(), code_size); + if (stage == Shader::Stage::Compute) { + const std::array workgroup_size{WorkgroupSize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } + } + protected: static constexpr size_t INST_SIZE = sizeof(u64); @@ -122,16 +157,22 @@ protected: u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; + + bool has_unbound_instructions = false; }; +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_offset) - : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + GPUVAddr program_base_, u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -158,11 +199,11 @@ public: ~GraphicsEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -174,18 +215,20 @@ class ComputeEnvironment final : public GenericEnvironment { public: explicit ComputeEnvironment() = default; explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { stage = Shader::Stage::Compute; } ~ComputeEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } @@ -193,8 +236,174 @@ public: private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; + +void SerializePipeline(std::span key, std::span envs, + std::ofstream& file) { + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); +} + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { + try { + std::ofstream file; + file.exceptions(std::ifstream::failbit); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); + return; + } + if (file.tellp() == 0) { + // Write header... + } + const std::span key_span(reinterpret_cast(&key), sizeof(key)); + SerializePipeline(key_span, MakeSpan(envs), file); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::Delete(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); + } + } +} + +class FileEnvironment final : public Shader::Environment { +public: + void Deserialize(std::ifstream& file) { + u64 code_size{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } + } + + u64 ReadInstruction(u32 address) override { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; + } + + u32 TextureBoundBuffer() const override { + return texture_bound; + } + + std::array WorkgroupSize() const override { + return workgroup_size; + } + +private: + std::unique_ptr code; + std::array workgroup_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; } // Anonymous namespace +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; + std::string base_dir{shader_dir + "/vulkan"}; + std::string transferable_dir{base_dir + "/transferable"}; + std::string precompiled_dir{base_dir + "/precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); + + Common::ThreadWorker worker(11, "PipelineBuilder"); + std::mutex cache_mutex; + struct { + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + std::ifstream file; + Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + // Read header... + + while (file.tellg() != end) { + if (stop_loading) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + auto envs{std::make_shared>(num_envs)}; + for (FileEnvironment& env : *envs) { + env.Deserialize(file); + } + if (envs->front().ShaderStage() == Shader::Stage::Compute) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + + std::lock_guard lock{cache_mutex}; + compute_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } else { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : *envs) { + env_ptrs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + + std::lock_guard lock{cache_mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } + ++state.total; + } + { + std::lock_guard lock{cache_mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + } + worker.WaitForRequests(); +} + size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -279,17 +488,22 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { if (!cpu_shader_addr) { return nullptr; } - ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; if (!shader) { - return CreateComputePipelineWithoutShader(*cpu_shader_addr); + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + shader = MakeShaderInfo(env, *cpu_shader_addr); } - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { return &pipeline; } - pipeline = CreateComputePipeline(shader); + pipeline = CreateComputePipeline(key, shader); return &pipeline; } @@ -310,26 +524,25 @@ bool PipelineCache::RefreshStages() { } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; if (!shader_info) { - const u32 offset{shader_config.offset}; - shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); } graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; } -const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr) { - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; +const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze(start_address)}) { + if (const std::optional cached_hash{env.Analyze()}) { info->unique_hash = *cached_hash; info->size_bytes = env.CachedSize(); } else { // Slow path, not really hit on commercial games // Build a control flow graph to get the real shader size - flow_block_pool.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + main_pools.flow_block.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; info->unique_hash = env.CalculateHash(); info->size_bytes = env.ReadSize(); } @@ -339,13 +552,55 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); - - std::array envs; +GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, + const GraphicsPipelineCacheKey& key, + std::span envs) { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + size_t env_index{0}; std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == u128{}) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + std::array modules; + + u32 binding{0}; + env_index = 0; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const std::vector code{EmitSPIRV(profile, env, program, binding)}; + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, + std::move(modules), infos); +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + main_pools.ReleaseContents(); + + std::array graphics_envs; + boost::container::static_vector generic_envs; + boost::container::static_vector envs; const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -353,86 +608,44 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { continue; } const auto program{static_cast(index)}; - GraphicsEnvironment& env{envs[index]}; + GraphicsEnvironment& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - - const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); - Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); - programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + generic_envs.push_back(&env); + envs.push_back(&env); } - std::array infos{}; - std::array modules; - - u32 binding{0}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { - continue; - } - UNIMPLEMENTED_IF(index == 0); - - GraphicsEnvironment& env{envs[index]}; - Shader::IR::Program& program{programs[index]}; - - const size_t stage_index{index - 1}; - infos[stage_index] = &program.info; - std::vector code{EmitSPIRV(profile, env, program, binding)}; - - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); - - modules[stage_index] = BuildShader(device, code); + GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); } - return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, - graphics_key.state, std::move(modules), infos); + return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { +ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; - if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { - // TODO: Load from cache + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + main_pools.ReleaseContents(); + ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); } - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); + return pipeline; +} - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; - Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; +ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env) const { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(profile, env, program, binding)}; - /* - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-dis D:\\shader.spv"); - */ - shader_info->unique_hash = env.CalculateHash(); - shader_info->size_bytes = env.ReadSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { - ShaderInfo shader; - ComputePipeline pipeline{CreateComputePipeline(&shader)}; - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - const size_t size_bytes{shader.size_bytes}; - Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); - return &compute_cache.emplace(key, std::move(pipeline)).first->second; -} - -ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { - const auto& qmd{kepler_compute.launch_description}; - return { - .unique_hash = unique_hash, - .shared_memory_size = qmd.shared_alloc, - .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, - }; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 60fb976df..2ecb68bdc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,7 @@ namespace Vulkan { class ComputePipeline; class Device; +class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKDescriptorPool; @@ -107,6 +109,18 @@ struct ShaderInfo { size_t size_bytes{}; }; +struct ShaderPools { + void ReleaseContents() { + inst.ReleaseContents(); + block.ReleaseContents(); + flow_block.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, @@ -123,19 +137,24 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + private: bool RefreshStages(); - const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr); + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); GraphicsPipeline CreateGraphicsPipeline(); - ComputePipeline CreateComputePipeline(ShaderInfo* shader); + GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs); - ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, + Shader::Environment& env) const; Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -155,11 +174,10 @@ private: std::unordered_map compute_cache; std::unordered_map graphics_cache; - Shader::ObjectPool inst_pool; - Shader::ObjectPool block_pool; - Shader::ObjectPool flow_block_pool; + ShaderPools main_pools; Shader::Profile profile; + std::string pipeline_cache_filename; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 7e5ae43ea..1c6ba7289 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -50,6 +50,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + std::lock_guard lock{mutex}; const auto [pair, is_new] = cache.try_emplace(key); if (!is_new) { return *pair->second; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index db8e83f1a..eaa0ed775 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "video_core/surface.h" @@ -37,7 +38,7 @@ struct hash { namespace Vulkan { - class Device; +class Device; class RenderPassCache { public: @@ -48,6 +49,7 @@ public: private: const Device* device{}; std::unordered_map cache; + std::mutex mutex; }; } // namespace Vulkan From f8115a6a9e544c3cc33f32ea821d0df15e01591c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: [PATCH 091/770] vk_pipeline_cache: Add pipeline cache --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 036b531b9..8f63a7591 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -514,6 +514,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } +void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); +} + void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 88dbd753b..2f1551e65 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -101,6 +101,8 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; private: static constexpr size_t MAX_TEXTURES = 192; From a8d8fd40f7d7b249c542e4694953e2da5998fbaf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 14:39:59 -0300 Subject: [PATCH 092/770] shader: Fix TEX mask --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b691b4d1f..d2626f3e7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -188,6 +188,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, } }()}; + IR::Reg dest_reg{tex.dest_reg}; for (int element = 0; element < 4; ++element) { if (((tex.mask >> element) & 1) == 0) { continue; @@ -198,7 +199,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, } else { value = IR::F32{v.ir.CompositeExtract(sample, element)}; } - v.F(tex.dest_reg + element, value); + v.F(dest_reg, value); + ++dest_reg; } if (tex.sparse_pred != IR::Pred::PT) { v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); From d40faa1db0f1703edc4e8f279f1556cee4ebddad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 16:12:04 -0300 Subject: [PATCH 093/770] vk_pipeline_cache: Fix ReleaseContents order --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 2ecb68bdc..d481f56f9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -111,9 +111,9 @@ struct ShaderInfo { struct ShaderPools { void ReleaseContents() { - inst.ReleaseContents(); - block.ReleaseContents(); flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } Shader::ObjectPool inst; From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: [PATCH 094/770] shader: Implement VOTE --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 ++ .../backend/spirv/emit_context.h | 3 +- .../backend/spirv/emit_spirv.cpp | 9 +++ .../backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_vote.cpp | 58 +++++++++++++++++++ .../frontend/ir/ir_emitter.cpp | 16 +++++ .../frontend/ir/ir_emitter.h | 5 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 6 ++ .../translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/vote.cpp | 52 +++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 6 ++ src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 7 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + .../vulkan_common/vulkan_device.cpp | 1 + src/video_core/vulkan_common/vulkan_device.h | 6 ++ 18 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 23cb523a8..086bdf8d0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp backend/spirv/emit_spirv_undefined.cpp + backend/spirv/emit_spirv_vote.cpp environment.h exception.h file_environment.cpp @@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4a4de3676..36f130781 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } + if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { + subgroup_local_invocation_id = + DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 9b9e0d6b1..6e64360bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -82,6 +82,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id subgroup_local_invocation_id{}; Id instance_id{}; Id instance_index{}; Id base_instance{}; @@ -96,7 +97,7 @@ public: std::array output_generics{}; std::array frag_color{}; - Id frag_depth {}; + Id frag_depth{}; std::vector interfaces; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 93e851133..107403912 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } + if (info.uses_subgroup_vote && profile.support_vote) { + ctx.AddExtension("SPV_KHR_shader_ballot"); + ctx.AddCapability(spv::Capability::SubgroupBallotKHR); + if (!profile.warp_size_potentially_larger_than_guest) { + // vote ops are only used when not taking the long path + ctx.AddExtension("SPV_KHR_subgroup_vote"); + ctx.AddCapability(spv::Capability::SubgroupVoteKHR); + } + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 960d022ff..ce23200f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod_lc, Id offset); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp new file mode 100644 index 000000000..a63677ef2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id LargeWarpBallot(EmitContext& ctx, Id ballot) { + const Id shift{ctx.Constant(ctx.U32[1], 5)}; + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); +} +} // Anonymous namespace + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return LargeWarpBallot(ctx, ballot); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 432dd29a5..ff2970125 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +U1 IREmitter::VoteAll(const U1& value) { + return Inst(Opcode::VoteAll, value); +} + +U1 IREmitter::VoteAny(const U1& value) { + return Inst(Opcode::VoteAny, value); +} + +U1 IREmitter::VoteEqual(const U1& value) { + return Inst(Opcode::VoteEqual, value); +} + +U32 IREmitter::SubgroupBallot(const U1& value) { + return Inst(Opcode::SubgroupBallot, value); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 346cef3ab..1708be3ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -234,6 +234,11 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); + [[nodiscard]] U1 VoteAny(const U1& value); + [[nodiscard]] U1 VoteEqual(const U1& value); + [[nodiscard]] U32 SubgroupBallot(const U1& value); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index bdc07b9a7..fe888b8b2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32, OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +// Vote operations +OPCODE(VoteAll, U1, U1, ) +OPCODE(VoteAny, U1, U1, ) +OPCODE(VoteEqual, U1, U1, ) +OPCODE(SubgroupBallot, U32, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 27b12ff3c..c0e36a7e2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) { ThrowNotImplemented(Opcode::VMNMX); } -void TranslatorVisitor::VOTE(u64) { - ThrowNotImplemented(Opcode::VOTE); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..a88894a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f44eac5d8..db5138e4d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::VoteAll: + case IR::Opcode::VoteAny: + case IR::Opcode::VoteEqual: + case IR::Opcode::SubgroupBallot: + info.uses_subgroup_vote = true; + break; default: break; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3181c79fb..b57cbc310 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -19,6 +19,8 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_vote{}; + bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f97730b34..3d9f04d1a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -80,6 +80,7 @@ struct Info { bool uses_sampled_1d{}; bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; + bool uses_subgroup_vote{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6684d37a6..8e544d745 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 41fc9588f..bdbc8dd1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_vote = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0e5b098c..009b74f12 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -737,6 +737,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + ext_subgroup_size_control = true; } } else { is_warp_potentially_bigger = true; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 82bccc8f0..c268a4f8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -193,6 +193,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_subgroup_size_control. + bool IsExtSubgroupSizeControlSupported() const { + return ext_subgroup_size_control; + } + /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { return ext_transform_feedback; @@ -297,6 +302,7 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. From 1d2db783986717c912709ccea4a18d9b8f396ee1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 22:07:14 -0300 Subject: [PATCH 095/770] shader: Fix use-after-free bug in object_pool --- src/shader_recompiler/object_pool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index c10751b9d..424281634 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template - requires std::is_constructible_v[[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward(args)...); } @@ -32,12 +32,12 @@ public: const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)}; chunks.clear(); chunks.emplace_back(total_objects); - chunks.shrink_to_fit(); } else { root.Release(); chunks.resize(1); - chunks.shrink_to_fit(); } + chunks.shrink_to_fit(); + node = &chunks.front(); } private: From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:33:45 -0300 Subject: [PATCH 096/770] shader: Implement NDC [-1, 1], attribute types and default varying initialization --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 35 +++++++--- .../backend/spirv/emit_context.h | 10 ++- .../backend/spirv/emit_spirv.h | 2 + .../spirv/emit_spirv_context_get_set.cpp | 69 ++++++++++++------- .../backend/spirv/emit_spirv_special.cpp | 35 ++++++++++ .../frontend/ir/ir_emitter.cpp | 8 +++ .../frontend/ir/ir_emitter.h | 3 + .../frontend/ir/microinstruction.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 4 ++ .../maxwell/structured_control_flow.cpp | 7 +- src/shader_recompiler/profile.h | 13 ++++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 33 ++++++++- .../renderer_vulkan/vk_pipeline_cache.h | 4 +- 15 files changed, 186 insertions(+), 43 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_special.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 086bdf8d0..028e8b2d2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_special.cpp backend/spirv/emit_spirv_undefined.cpp backend/spirv/emit_spirv_vote.cpp environment.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 36f130781..ea46af244 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -67,6 +67,18 @@ Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } + +Id GetAttributeType(EmitContext& ctx, AttributeType type) { + switch (type) { + case AttributeType::Float: + return ctx.F32[4]; + case AttributeType::SignedInt: + return ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedInt: + return ctx.U32[4]; + } + throw InvalidArgument("Invalid attribute type {}", type); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -82,11 +94,11 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) - : Sirit::Module(0x00010000), profile{profile_} { + : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineInterfaces(program.info, program.stage); + DefineInterfaces(program.info); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); @@ -130,6 +142,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { U32.Define(*this, TypeInt(32, false), "u32"); input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); if (info.uses_int8) { @@ -162,9 +177,9 @@ void EmitContext::DefineCommonConstants() { u32_zero_value = Constant(U32[1], 0U); } -void EmitContext::DefineInterfaces(const Info& info, Stage stage) { - DefineInputs(info, stage); - DefineOutputs(info, stage); +void EmitContext::DefineInterfaces(const Info& info) { + DefineInputs(info); + DefineOutputs(info); } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { @@ -252,7 +267,7 @@ void EmitContext::DefineLabels(IR::Program& program) { } } -void EmitContext::DefineInputs(const Info& info, Stage stage) { +void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); } @@ -288,8 +303,8 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) { if (!info.loads_generics[index]) { continue; } - // FIXME: Declare size from input - const Id id{DefineInput(*this, F32[4])}; + const Id type{GetAttributeType(*this, profile.generic_input_types[index])}; + const Id id{DefineInput(*this, type)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; @@ -323,8 +338,8 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: } } -void EmitContext::DefineOutputs(const Info& info, Stage stage) { - if (info.stores_position) { +void EmitContext::DefineOutputs(const Info& info) { + if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); } for (size_t i = 0; i < info.stores_generics.size(); ++i) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 6e64360bf..5ed815c06 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -52,6 +52,7 @@ public: [[nodiscard]] Id Def(const IR::Value& value); const Profile& profile; + Stage stage{}; Id void_id{}; Id U1{}; @@ -72,6 +73,9 @@ public: UniformDefinitions uniform_types; Id input_f32{}; + Id input_u32{}; + Id input_s32{}; + Id output_f32{}; Id storage_u32{}; @@ -104,7 +108,7 @@ public: private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineInterfaces(const Info& info, Stage stage); + void DefineInterfaces(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); @@ -113,8 +117,8 @@ private: void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size); - void DefineInputs(const Info& info, Stage stage); - void DefineOutputs(const Info& info, Stage stage); + void DefineInputs(const Info& info); + void DefineOutputs(const Info& info); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index ce23200f2..7fefcf2f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -28,6 +28,8 @@ void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 052b84151..8fc040f8b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -2,30 +2,26 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/backend/spirv/emit_spirv.h" +#pragma optimize("", off) + namespace Shader::Backend::SPIRV { namespace { -Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { - const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; - if (IR::IsGeneric(attr)) { - const u32 index{IR::GenericAttributeIndex(attr)}; - return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); - } - switch (attr) { - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); - case IR::Attribute::InstanceId: - return ctx.OpLoad(ctx.U32[1], ctx.instance_id); - case IR::Attribute::VertexId: - return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); - default: - throw NotImplementedException("Read attribute {}", attr); +std::tuple AttrTypes(EmitContext& ctx, u32 index) { + const bool is_first_reader{ctx.stage == Stage::VertexB}; + const AttributeType type{ctx.profile.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return {ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return {ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return {ctx.input_s32, ctx.TypeInt(32, true), true}; } + throw InvalidArgument("Invalid attribute type {}", type); } Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { @@ -129,19 +125,40 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { - if (!ctx.profile.support_vertex_instance_id) { - switch (attr) { - case IR::Attribute::InstanceId: + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const auto [pointer_type, type, needs_cast]{AttrTypes(ctx, index)}; + const Id generic_id{ctx.input_generics.at(index)}; + const Id pointer{ctx.OpAccessChain(pointer_type, generic_id, element_id())}; + const Id value{ctx.OpLoad(type, pointer)}; + return needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id())); + case IR::Attribute::InstanceId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + } else { return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_index), ctx.OpLoad(ctx.U32[1], ctx.base_instance)); - case IR::Attribute::VertexId: + } + case IR::Attribute::VertexId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); + } else { return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index), ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); - default: - break; } + default: + throw NotImplementedException("Read attribute {}", attr); } - return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 000000000..70ae7b51e --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitPrologue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB) { + const Id zero{ctx.Constant(ctx.F32[1], 0.0f)}; + const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; + const Id null_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, zero)}; + ctx.OpStore(ctx.output_position, ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)); + for (const Id generic_id : ctx.output_generics) { + if (Sirit::ValidId(generic_id)) { + ctx.OpStore(generic_id, null_vector); + } + } + } +} + +void EmitEpilogue(EmitContext& ctx) { + if (ctx.profile.convert_depth_mode) { + const Id type{ctx.F32[1]}; + const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; + const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; + ctx.OpStore(ctx.output_position, vector); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ff2970125..ce610799a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -92,6 +92,14 @@ void IREmitter::DemoteToHelperInvocation(Block* continue_label) { Inst(Opcode::DemoteToHelperInvocation, continue_label); } +void IREmitter::Prologue() { + Inst(Opcode::Prologue); +} + +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1708be3ef..39109b0de 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -39,6 +39,9 @@ public: void Return(); void DemoteToHelperInvocation(Block* continue_label); + void Prologue(); + void Epilogue(); + [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 21b7d8a9f..ba3968056 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,6 +56,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SelectionMerge: case Opcode::Return: case Opcode::DemoteToHelperInvocation: + case Opcode::Prologue: + case Opcode::Epilogue: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::SetFragColor: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index fe888b8b2..8945c7b04 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -15,6 +15,10 @@ OPCODE(SelectionMerge, Void, Labe OPCODE(Return, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) + // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cec03e73e..fdac1c95a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -634,6 +634,9 @@ public: : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); + + IR::IREmitter ir{*block_list.front()}; + ir.Prologue(); } private: @@ -734,7 +737,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.Return(); + IR::IREmitter ir{*current_block}; + ir.Epilogue(); + ir.Return(); current_block = nullptr; break; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index b57cbc310..41550bfc6 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -4,8 +4,18 @@ #pragma once +#include + +#include "common/common_types.h" + namespace Shader { +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, +}; + struct Profile { bool unified_descriptor_binding{}; bool support_vertex_instance_id{}; @@ -24,6 +34,9 @@ struct Profile { // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + + std::array generic_input_types{}; + bool convert_depth_mode{}; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a2ec418b1..a87ed1976 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -181,6 +181,9 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bdbc8dd1e..504b8c9d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -437,7 +437,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - profile = Shader::Profile{ + base_profile = Shader::Profile{ .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -458,6 +458,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .generic_input_types{}, }; } @@ -589,6 +590,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, Shader::Environment& env{*envs[env_index]}; ++env_index; + const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } @@ -645,9 +647,36 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, env, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, + Shader::Stage stage) { + Shader::Profile profile{base_profile}; + if (stage == Shader::Stage::VertexB) { + profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), + &CastAttributeType); + } + return profile; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index d481f56f9..e09d78063 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -156,6 +156,8 @@ private: ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env) const; + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -176,7 +178,7 @@ private: ShaderPools main_pools; - Shader::Profile profile; + Shader::Profile base_profile; std::string pipeline_cache_filename; }; From 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 00:02:30 +0100 Subject: [PATCH 097/770] shader: Fix F2I --- .../backend/spirv/emit_spirv.h | 3 + .../spirv/emit_spirv_floating_point.cpp | 20 ++++- .../frontend/ir/ir_emitter.cpp | 18 ++++ .../frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../floating_point_conversion_integer.cpp | 88 +++++++++++++++++-- .../frontend/maxwell/translate/impl/impl.cpp | 17 ++++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../ir_opt/collect_shader_info_pass.cpp | 2 + .../ir_opt/lower_fp16_to_fp32.cpp | 2 + 10 files changed, 147 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7fefcf2f2..6d4adafc7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -189,6 +189,9 @@ Id EmitFPSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); Id EmitFPRoundEven16(EmitContext& ctx, Id value); Id EmitFPRoundEven32(EmitContext& ctx, Id value); Id EmitFPRoundEven64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 1fdf66cb6..24300af39 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -15,7 +15,7 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { return op; } -Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) { +Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { if (ctx.profile.has_broken_spirv_clamp) { return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); } else { @@ -139,19 +139,31 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) { Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; - return Saturate(ctx, ctx.F16[1], value, zero, one); + return Clamp(ctx, ctx.F16[1], value, zero, one); } Id EmitFPSaturate32(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; - return Saturate(ctx, ctx.F32[1], value, zero, one); + return Clamp(ctx, ctx.F32[1], value, zero, one); } Id EmitFPSaturate64(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; - return Saturate(ctx, ctx.F64[1], value, zero, one); + return Clamp(ctx, ctx.F64[1], value, zero, one); +} + +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F16[1], value, min_value, max_value); +} + +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F32[1], value, min_value, max_value); +} + +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F64[1], value, min_value, max_value); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce610799a..6280c08f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } +F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value) { + if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), + max_value.Type()); + } + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::FPClamp16, value, min_value, max_value); + case Type::F32: + return Inst(Opcode::FPClamp32, value, min_value, max_value); + case Type::F64: + return Inst(Opcode::FPClamp64, value, min_value, max_value); + default: + ThrowInvalidType(value.Type()); + } +} + F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 39109b0de..ebbda78a9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -147,6 +147,7 @@ public: [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8945c7b04..dd17212a1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -192,6 +192,9 @@ OPCODE(FPLog2, F32, F32, OPCODE(FPSaturate16, F16, F16, ) OPCODE(FPSaturate32, F32, F32, ) OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPClamp16, F16, F16, F16, F16, ) +OPCODE(FPClamp32, F32, F32, F32, F32, ) +OPCODE(FPClamp64, F64, F64, F64, F64, ) OPCODE(FPRoundEven16, F16, F16, ) OPCODE(FPRoundEven32, F32, F32, ) OPCODE(FPRoundEven64, F64, F64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 81175627f..7c5a72800 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" @@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) { } } +std::pair ClampBounds(DestFormat format, bool is_signed) { + if (is_signed) { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } else { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } + throw NotImplementedException("Invalid destination format {}", format); +} + IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { union { u64 raw; @@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // For example converting F32 65537.0 to U16, the expected value is 0xffff, const bool is_signed{f2i.is_signed != 0}; - const size_t bitsize{BitSize(f2i.dest_format)}; - const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); + + IR::F16F32F64 intermediate; + switch (f2i.src_format) { + case SrcFormat::F16: { + const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(max_bound)))}; + const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(min_bound)))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F32: { + const IR::F32 max_val{v.ir.Imm32(static_cast(max_bound))}; + const IR::F32 min_val{v.ir.Imm32(static_cast(min_bound))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F64: { + const IR::F64 max_val{v.ir.Imm64(max_bound)}; + const IR::F64 min_val{v.ir.Imm64(min_bound)}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + default: + throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); + } + + const size_t bitsize{std::max(32, BitSize(f2i.dest_format))}; + IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; + + bool handled_special_case = false; + const bool special_nan_cases = + (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); + if (special_nan_cases) { + if (f2i.dest_format == DestFormat::I32) { + handled_special_case = true; + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; + } else if (f2i.dest_format == DestFormat::I64) { + handled_special_case = true; + result = IR::U64{ + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + } + } + if (!handled_special_case && is_signed) { + if (bitsize != 64) { + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; + } else { + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + } + } if (bitsize == 64) { - const IR::Value vector{v.ir.UnpackUint2x32(result)}; - v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); - v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + v.L(f2i.dest_reg, result); } else { v.X(f2i.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 758a0230a..9bae89c10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::U64 TranslatorVisitor::L(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } @@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackUint2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c994fe803..54c31deb4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -341,10 +341,12 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::U64 L(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + void L(IR::Reg dest_reg, const IR::U64& value); void F(IR::Reg dest_reg, const IR::F32& value); void D(IR::Reg dest_reg, const IR::F64& value); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index db5138e4d..32f276f3b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -105,6 +105,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPNeg16: case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: + case IR::Opcode::FPClamp16: case IR::Opcode::FPTrunc16: case IR::Opcode::FPOrdEqual16: case IR::Opcode::FPUnordEqual16: @@ -148,6 +149,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRecipSqrt64: case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: + case IR::Opcode::FPClamp64: case IR::Opcode::FPTrunc64: case IR::Opcode::FPOrdEqual64: case IR::Opcode::FPUnordEqual64: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0e8862f45..0d2c91ed6 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -30,6 +30,8 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::FPRoundEven32; case IR::Opcode::FPSaturate16: return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp16: + return IR::Opcode::FPClamp32; case IR::Opcode::FPTrunc16: return IR::Opcode::FPTrunc32; case IR::Opcode::CompositeConstructF16x2: From 83a283fa867d0a09742faff11d9115acc95ea556 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:36:59 -0300 Subject: [PATCH 098/770] shader: Minor style nits --- .../translate/impl/floating_point_conversion_integer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 7c5a72800..ef55b9c75 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -69,7 +69,8 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { case DestFormat::I64: return {static_cast(std::numeric_limits::max()), static_cast(std::numeric_limits::min())}; - default: {} + default: + break; } } else { switch (format) { @@ -82,7 +83,8 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { case DestFormat::I64: return {static_cast(std::numeric_limits::max()), static_cast(std::numeric_limits::min())}; - default: {} + default: + break; } } throw NotImplementedException("Invalid destination format {}", format); From d3dad6b6320f680b4e85ab991941d15cbce0e616 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 19:37:44 -0300 Subject: [PATCH 099/770] shader: Properly insert Prologue instruction --- .../frontend/maxwell/structured_control_flow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index fdac1c95a..79e344986 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -635,7 +635,8 @@ public: block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); - IR::IREmitter ir{*block_list.front()}; + IR::Block& first_block{*block_list.front()}; + IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } From 49e87ea8ab86f94239a6830666f3a8f897a0167a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 19:38:37 -0300 Subject: [PATCH 100/770] shader: Track first bindless argument instead of the instruction itself --- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index de9d633e2..2c8164b8a 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -121,7 +121,7 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { VisitedBlocks visited; - const std::optional track_addr{Track(block, IR::Value{&inst}, visited)}; + const std::optional track_addr{Track(block, inst.Arg(0), visited)}; if (!track_addr) { throw NotImplementedException("Failed to track bindless texture constant buffer"); } From 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Mar 2021 11:31:37 -0400 Subject: [PATCH 101/770] shader: Implement SHFL --- src/shader_recompiler/CMakeLists.txt | 3 +- .../backend/spirv/emit_context.cpp | 3 +- .../backend/spirv/emit_spirv.cpp | 6 +- .../backend/spirv/emit_spirv.h | 9 ++ .../backend/spirv/emit_spirv_vote.cpp | 58 -------- .../backend/spirv/emit_spirv_warp.cpp | 135 ++++++++++++++++++ .../frontend/ir/ir_emitter.cpp | 23 +++ .../frontend/ir/ir_emitter.h | 12 +- .../frontend/ir/microinstruction.cpp | 12 ++ .../frontend/ir/microinstruction.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 7 +- .../translate/impl/integer_scaled_add.cpp | 4 +- .../translate/impl/not_implemented.cpp | 4 - .../maxwell/translate/impl/warp_shuffle.cpp | 69 +++++++++ .../ir_opt/collect_shader_info_pass.cpp | 6 + src/shader_recompiler/shader_info.h | 1 + 16 files changed, 284 insertions(+), 69 deletions(-) delete mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 028e8b2d2..4161783c8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -16,7 +16,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_select.cpp backend/spirv/emit_spirv_special.cpp backend/spirv/emit_spirv_undefined.cpp - backend/spirv/emit_spirv_vote.cpp + backend/spirv/emit_spirv_warp.cpp environment.h exception.h file_environment.cpp @@ -125,6 +125,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/vote.cpp + frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea46af244..5db4a9082 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } - if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { + if (info.uses_subgroup_invocation_id || + (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { subgroup_local_invocation_id = DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 107403912..cee72f50d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if (info.uses_subgroup_vote && profile.support_vote) { + if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { ctx.AddExtension("SPV_KHR_shader_ballot"); ctx.AddCapability(spv::Capability::SubgroupBallotKHR); if (!profile.warp_size_potentially_larger_than_guest) { @@ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } +void EmitGetInBoundsFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 6d4adafc7..a233a4817 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); Id EmitFPAbs16(EmitContext& ctx, Id value); Id EmitFPAbs32(EmitContext& ctx, Id value); Id EmitFPAbs64(EmitContext& ctx, Id value); @@ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp deleted file mode 100644 index a63677ef2..000000000 --- a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/backend/spirv/emit_spirv.h" - -namespace Shader::Backend::SPIRV { -namespace { -Id LargeWarpBallot(EmitContext& ctx, Id ballot) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; - const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; - return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); -} -} // Anonymous namespace - -Id EmitVoteAll(EmitContext& ctx, Id pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpSubgroupAllKHR(ctx.U1, pred); - } - const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; - const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; - return ctx.OpIEqual(ctx.U1, lhs, active_mask); -} - -Id EmitVoteAny(EmitContext& ctx, Id pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpSubgroupAnyKHR(ctx.U1, pred); - } - const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; - const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; - return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); -} - -Id EmitVoteEqual(EmitContext& ctx, Id pred) { - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); - } - const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; - const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; - return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), - ctx.OpIEqual(ctx.U1, lhs, active_mask)); -} - -Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { - const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; - if (!ctx.profile.warp_size_potentially_larger_than_guest) { - return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); - } - return LargeWarpBallot(ctx, ballot); -} - -} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..44d8a347f --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -0,0 +1,135 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id LargeWarpBallot(EmitContext& ctx, Id ballot) { + const Id shift{ctx.Constant(ctx.U32[1], 5)}; + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); +} + +void SetInBoundsFlag(IR::Inst* inst, Id result) { + IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + in_bounds->SetDefinition(result); + in_bounds->Invalidate(); +} + +Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { + return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); +} + +Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { + return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, + ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); +} + +Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); +} + +Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { + return ctx.OpSelect(ctx.U32[1], in_range, + ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); +} +} // Anonymous namespace + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return LargeWarpBallot(ctx, ballot); +} + +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; + const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; + + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; + const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask) { + const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; + const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; + const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; + + SetInBoundsFlag(inst, in_range); + return SelectValue(ctx, in_range, value, src_thread_id); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6280c08f6..418b7f5ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { return Inst(Opcode::GetSparseFromOp, op); } +U1 IREmitter::GetInBoundsFromOp(const Value& op) { + return Inst(Opcode::GetInBoundsFromOp, op); +} + F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); @@ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) { return Inst(Opcode::SubgroupBallot, value); } +U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleIndex, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleUp, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleDown, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); +} } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ebbda78a9..64738735e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -104,6 +104,7 @@ public: [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); [[nodiscard]] U1 GetSparseFromOp(const Value& op); + [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); @@ -147,7 +148,8 @@ public: [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); + [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); @@ -242,6 +244,14 @@ public: [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); [[nodiscard]] U32 SubgroupBallot(const U1& value); + [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, + const IR::U32& clamp, const IR::U32& seg_mask); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ba3968056..be8eb4d4c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: case Opcode::GetSparseFromOp: + case Opcode::GetInBoundsFromOp: return true; default: return false; @@ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { case Opcode::GetSparseFromOp: CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); return associated_insts->sparse_inst; + case Opcode::GetInBoundsFromOp: + CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); + return associated_insts->in_bounds_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -262,6 +266,10 @@ void Inst::Use(const Value& value) { AllocAssociatedInsts(assoc_inst); SetPseudoInstruction(assoc_inst->sparse_inst, this); break; + case Opcode::GetInBoundsFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->in_bounds_inst, this); + break; default: break; } @@ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) { AllocAssociatedInsts(assoc_inst); RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; + case Opcode::GetInBoundsFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); + break; default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index d5336c438..770bbd550 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); struct AssociatedInsts { union { + Inst* in_bounds_inst; Inst* sparse_inst; Inst* zero_inst{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dd17212a1..a2479c46a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -159,6 +159,7 @@ OPCODE(GetSignFromOp, U1, Opaq OPCODE(GetCarryFromOp, U1, Opaque, ) OPCODE(GetOverflowFromOp, U1, Opaque, ) OPCODE(GetSparseFromOp, U1, Opaque, ) +OPCODE(GetInBoundsFromOp, U1, Opaque, ) // Floating-point operations OPCODE(FPAbs16, F16, F16, ) @@ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod, F32x4, U32, OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -// Vote operations +// Warp operations OPCODE(VoteAll, U1, U1, ) OPCODE(VoteAny, U1, U1, ) OPCODE(VoteEqual, U1, U1, ) OPCODE(SubgroupBallot, U32, U1, ) +OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 5469e445a..42fd42bb1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { ISCADD(*this, insn, GetReg20(insn)); } -void TranslatorVisitor::ISCADD_cbuf(u64) { - throw NotImplementedException("ISCADD (cbuf)"); +void TranslatorVisitor::ISCADD_cbuf(u64 insn) { + ISCADD(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::ISCADD_imm(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c0e36a7e2..3ccd7b925 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { ThrowNotImplemented(Opcode::SETLMEMBASE); } -void TranslatorVisitor::SHFL(u64) { - ThrowNotImplemented(Opcode::SHFL); -} - void TranslatorVisitor::SSY() { // SSY is a no-op } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ShuffleMode : u64 { + IDX, + UP, + DOWN, + BFLY, +}; + +[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, + const IR::U32& index, const IR::U32& mask, + ShuffleMode shfl_op) { + const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; + const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; + switch (shfl_op) { + case ShuffleMode::IDX: + return ir.ShuffleIndex(value, index, clamp, seg_mask); + case ShuffleMode::UP: + return ir.ShuffleUp(value, index, clamp, seg_mask); + case ShuffleMode::DOWN: + return ir.ShuffleDown(value, index, clamp, seg_mask); + case ShuffleMode::BFLY: + return ir.ShuffleButterfly(value, index, clamp, seg_mask); + default: + throw NotImplementedException("Invalid SHFL op {}", shfl_op); + } +} + +void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<30, 2, ShuffleMode> mode; + BitField<48, 3, IR::Pred> pred; + } const shfl{insn}; + + const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; + v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); + v.X(shfl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHFL(u64 insn) { + union { + u64 insn; + BitField<20, 5, u64> src_a_imm; + BitField<28, 1, u64> src_a_flag; + BitField<29, 1, u64> src_b_flag; + BitField<34, 13, u64> src_b_imm; + } const flags{insn}; + const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast(flags.src_a_imm)) + : GetReg20(insn)}; + const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast(flags.src_b_imm)) + : GetReg39(insn)}; + Shuffle(*this, insn, src_a, src_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 32f276f3b..61cc314c7 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -307,6 +307,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::ShuffleIndex: + case IR::Opcode::ShuffleUp: + case IR::Opcode::ShuffleDown: + case IR::Opcode::ShuffleButterfly: + info.uses_subgroup_invocation_id = true; + break; case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::GetCbufU16: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3d9f04d1a..27e61a5f9 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -56,6 +56,7 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + bool uses_subgroup_invocation_id{}; std::array loads_generics{}; bool loads_position{}; From c7c518e280d1ac04adb08d45145690fd06ac7b18 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 23:41:55 +0100 Subject: [PATCH 102/770] shader: Implement TLD4 and TLD4_B --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 8 + .../backend/spirv/emit_spirv_image.cpp | 40 ++++ .../frontend/ir/ir_emitter.cpp | 13 ++ .../frontend/ir/ir_emitter.h | 6 + src/shader_recompiler/frontend/ir/modifiers.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 6 + .../frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_gather.cpp | 209 ++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 8 +- .../ir_opt/constant_propagation_pass.cpp | 12 + src/shader_recompiler/ir_opt/texture_pass.cpp | 10 + 13 files changed, 315 insertions(+), 11 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 4161783c8..1f286484c 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -124,6 +124,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/texture_gather.cpp frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index a233a4817..4f945b917 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -340,10 +340,14 @@ Id EmitBindlessImageSampleImplicitLod(EmitContext&); Id EmitBindlessImageSampleExplicitLod(EmitContext&); Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -352,6 +356,10 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod_lc, Id offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id offset, Id offset2, Id dref); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f75152911..589013773 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -30,6 +30,12 @@ public: } } + explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset) { + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + } + void Add(spv::ImageOperandsMask new_mask, Id value) { mask = static_cast(static_cast(mask) | static_cast(new_mask)); @@ -98,6 +104,14 @@ Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -114,6 +128,14 @@ Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -152,4 +174,22 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + [[maybe_unused]] Id offset2) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, offset); + return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, + ctx.F32[4], Texture(ctx, index), coords, + ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), + operands.Span()); +} + +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id offset, [[maybe_unused]] Id offset2, Id dref) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, offset); + return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, + ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 418b7f5ac..b365a8a6e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1474,6 +1474,19 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather}; + return Inst(op, Flags{info}, handle, coords, offset, offset2); +} + +Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, const F32& dref, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref + : Opcode::BindlessImageGatherDref}; + return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 64738735e..04b43197f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -240,6 +240,12 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info); + + [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, const F32& dref, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 308c00153..4f09a4b39 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -38,6 +38,7 @@ union TextureInstInfo { BitField<8, 1, u32> has_bias; BitField<9, 1, u32> has_lod_clamp; BitField<10, 1, u32> relaxed_precision; + BitField<11, 2, u32> gather_component; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a2479c46a..60a0bc980 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -353,16 +353,22 @@ OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c6cd2a79b..d668dc1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -254,8 +254,8 @@ INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") -INST(TLD4, "TLD4", "1100 10-- --11 1---") -INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") +INST(TLD4, "TLD4", "1100 10-- ---- ----") +INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") INST(TLDS, "TLDS", "1101 -01- ---- ----") INST(TMML, "TMML", "1101 1111 0101 1---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3ccd7b925..e59c3326e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,14 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4(u64) { - ThrowNotImplemented(Opcode::TLD4); -} - -void TranslatorVisitor::TLD4_b(u64) { - ThrowNotImplemented(Opcode::TLD4_b); -} - void TranslatorVisitor::TLD4S(u64) { ThrowNotImplemented(Opcode::TLD4S); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..d64865876 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -0,0 +1,209 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +enum class OffsetType : u64 { + None = 0, + AOFFI, + PTP, + Invalid, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { + const IR::U32 value1{v.X(reg++)}; + const IR::U32 value2{v.X(reg++)}; + const auto getVector = ([&v](const IR::U32& value) { + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + }); + return {getVector(value1), getVector(value2)}; +} + +void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, + bool is_bindless) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld4{insn}; + + const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; + + IR::Reg meta_reg{tld4.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::Value offset2; + IR::F32 dref; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld4.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + switch (offset_type) { + case OffsetType::None: + break; + case OffsetType::AOFFI: { + offset = MakeOffset(v, meta_reg, tld4.type); + break; + } + case OffsetType::PTP: { + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + break; + } + default: + throw NotImplementedException("Invalid offset type {}", offset_type); + } + if (tld4.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.gather_component.Assign(static_cast(component_type)); + const IR::Value sample{[&]() -> IR::Value { + if (tld4.dc == 0) { + return v.ir.ImageGather(handle, coords, offset, offset2, info); + } + return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); + }()}; + + IR::Reg dest_reg{tld4.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld4.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld4.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4(u64 insn) { + union { + u64 raw; + BitField<56, 2, ComponentType> component; + BitField<54, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, false); +} + +void TranslatorVisitor::TLD4_b(u64 insn) { + union { + u64 raw; + BitField<38, 2, ComponentType> component; + BitField<36, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 61cc314c7..6fe06fda8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -352,14 +352,20 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: case IR::Opcode::BoundImageSampleDrefImplicitLod: case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageGather: + case IR::Opcode::BoundImageGatherDref: case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleDrefImplicitLod: - case IR::Opcode::ImageSampleDrefExplicitLod: { + case IR::Opcode::ImageSampleDrefExplicitLod: + case IR::Opcode::ImageGather: + case IR::Opcode::ImageGatherDref: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3dab424f6..28060dccf 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -403,6 +403,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base >> shift) & ((1U << count) - 1); }); return; + case IR::Opcode::BitFieldSExtract: + FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { + const size_t back_shift = static_cast(shift) + static_cast(count); + if (back_shift > Common::BitSize()) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, + base, shift, count); + } + const size_t left_shift = Common::BitSize() - back_shift; + return static_cast(static_cast(base << left_shift) >> + static_cast(Common::BitSize() - count)); + }); + return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); default: diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 2c8164b8a..454ac3e71 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -45,6 +45,12 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageSampleDrefExplicitLod: case IR::Opcode::BindlessImageSampleDrefExplicitLod: return IR::Opcode::ImageSampleDrefExplicitLod; + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BoundImageGather: + return IR::Opcode::ImageGather; + case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BoundImageGatherDref: + return IR::Opcode::ImageGatherDref; default: return IR::Opcode::Void; } @@ -56,11 +62,15 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageGather: + case IR::Opcode::BindlessImageGatherDref: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: case IR::Opcode::BoundImageSampleDrefImplicitLod: case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageGather: + case IR::Opcode::BoundImageGatherDref: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); From fda0835300a7ef6112791ae503435c81ffe883f5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 19:59:35 +0100 Subject: [PATCH 103/770] shader: Implement TLD4S. --- src/shader_recompiler/CMakeLists.txt | 1 + .../translate/impl/not_implemented.cpp | 4 - .../impl/texture_gather_swizzled.cpp | 133 ++++++++++++++++++ 3 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 1f286484c..5f3868bfe 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -124,6 +124,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/impl/warp_shuffle.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e59c3326e..788765c21 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,10 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4S(u64) { - ThrowNotImplemented(Opcode::TLD4S); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..beab515ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F32, + F16, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +union Encoding { + u64 raw; + BitField<55, 1, Precision> precision; + BitField<52, 2, ComponentType> component_type; + BitField<51, 1, u64> aoffi; + BitField<50, 1, u64> dc; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tld4s{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tld4s.cbuf_offset * 4))}; + const IR::Reg reg_a{tld4s.src_reg_a}; + const IR::Reg reg_b{tld4s.src_reg_b}; + IR::TextureInstInfo info{}; + if (tld4s.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.gather_component.Assign(static_cast(tld4s.component_type.Value())); + info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + IR::Value coords; + if (tld4s.aoffi != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::Value offset = MakeOffset(v, reg_b); + if (tld4s.dc != 0) { + CheckAlignment(reg_b, 2); + IR::F32 dref = v.F(reg_b + 1); + return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); + } + return v.ir.ImageGather(handle, coords, offset, {}, info); + } + if (tld4s.dc != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::F32 dref = v.F(reg_b); + return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); + } + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); + return v.ir.ImageGather(handle, coords, {}, {}, info); +} + +IR::Reg RegStoreComponent32(u64 insn, size_t index) { + const Encoding tlds4{insn}; + switch (index) { + case 0: + return tlds4.dest_reg_a; + case 1: + CheckAlignment(tlds4.dest_reg_a, 2); + return tlds4.dest_reg_a + 1; + case 2: + return tlds4.dest_reg_b; + case 3: + CheckAlignment(tlds4.dest_reg_b, 2); + return tlds4.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + for (size_t component = 0; component < 4; ++component) { + const IR::Reg dest{RegStoreComponent32(insn, component)}; + v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + std::array swizzled; + for (size_t component = 0; component < 4; ++component) { + swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; + } + const Encoding tld4s{insn}; + v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4S(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell From f5672777c8af4700c9e0fc32af52cb2563f564f4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 20:27:09 +0100 Subject: [PATCH 104/770] shader: Implement FragDepth --- src/shader_recompiler/backend/spirv/emit_context.cpp | 6 ++++++ .../frontend/maxwell/translate/impl/exit_program.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 5db4a9082..3d028db0f 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -359,6 +359,12 @@ void EmitContext::DefineOutputs(const Info& info) { Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); Name(frag_color[i], fmt::format("frag_color{}", i)); } + if (!info.stores_frag_depth) { + return; + } + frag_depth = DefineOutput(*this, F32[1]); + Decorate(frag_depth, spv::Decoration::BuiltIn, static_cast(spv::BuiltIn::FragDepth)); + Name(frag_depth, "frag_depth"); } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index ea9b33da9..58a53c0ec 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -25,7 +25,7 @@ void ExitFragment(TranslatorVisitor& v) { throw NotImplementedException("Sample mask"); } if (sph.ps.omap.depth != 0) { - throw NotImplementedException("Fragment depth"); + v.ir.SetFragDepth(v.F(src_reg + 1)); } } } // Anonymous namespace From 981eb6f43bb88f1e57b4c657bf37cb7471a113e3 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 01:54:03 +0100 Subject: [PATCH 105/770] shader: Fix Array Indices in TEX/TLD4 --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 6 +++--- .../frontend/maxwell/translate/impl/texture_gather.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index d2626f3e7..1f1689c43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -61,11 +61,11 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -73,7 +73,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); } throw NotImplementedException("Invalid texture type {}", type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index d64865876..8c6384040 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -65,11 +65,11 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -77,7 +77,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); } throw NotImplementedException("Invalid texture type {}", type); } From 742d11c2ad948c8630be15901514ec9e5e5fcd20 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 16:02:04 +0100 Subject: [PATCH 106/770] shader: Implement TLD4.PTP --- .../backend/spirv/emit_context.cpp | 1 + .../backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv.h | 7 +++-- .../backend/spirv/emit_spirv_composite.cpp | 31 +++++++++++++++++-- .../backend/spirv/emit_spirv_image.cpp | 9 ++++-- .../frontend/ir/ir_emitter.cpp | 14 ++++++--- .../frontend/ir/microinstruction.h | 6 ++++ src/shader_recompiler/frontend/ir/modifiers.h | 5 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + src/shader_recompiler/frontend/ir/value.cpp | 14 +++++++++ src/shader_recompiler/frontend/ir/value.h | 1 + .../maxwell/translate/impl/texture_fetch.cpp | 14 +++++---- .../maxwell/translate/impl/texture_gather.cpp | 16 +++++----- .../ir_opt/constant_propagation_pass.cpp | 18 +++++++++++ src/shader_recompiler/profile.h | 1 + 15 files changed, 111 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3d028db0f..7d8b938d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -169,6 +169,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { AddCapability(spv::Capability::Float64); F64.Define(*this, TypeFloat(64), "f64"); } + array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2"); } void EmitContext::DefineCommonConstants() { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 5ed815c06..0a1e85408 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -65,6 +65,7 @@ public: VectorTypes U32; VectorTypes F16; VectorTypes F64; + Id array_U32x2; Id true_value{}; Id false_value{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4f945b917..eaf94dad5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); @@ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); @@ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); @@ -122,6 +122,7 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 616e63676..0da682859 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,10 +3,15 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + const auto info{inst->Flags()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.U32[2], e1, e2); + } return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); } @@ -42,7 +47,12 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + + const auto info{inst->Flags()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.F16[2], e1, e2); + } return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); } @@ -78,7 +88,11 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + const auto info{inst->Flags()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.F32[2], e1, e2); + } return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); } @@ -150,4 +164,15 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + const auto info{inst->Flags()}; + if (info.is_constant) { + return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4); + } + if (ctx.profile.support_variadic_ptp) { + return OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); + } + return {}; +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 589013773..776afd4ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -30,10 +30,13 @@ public: } } - explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset) { + explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) { if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } + if (Sirit::ValidId(offset2)) { + Add(spv::ImageOperandsMask::ConstOffsets, offset2); + } } void Add(spv::ImageOperandsMask new_mask, Id value) { @@ -177,7 +180,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, [[maybe_unused]] Id offset2) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, offset); + const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), @@ -187,7 +190,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, [[maybe_unused]] Id offset2, Id dref) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, offset); + const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b365a8a6e..f49c30484 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -398,15 +398,16 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } + CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: - return Inst(Opcode::CompositeConstructU32x2, e1, e2); + return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); case Type::F16: - return Inst(Opcode::CompositeConstructF16x2, e1, e2); + return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); case Type::F32: - return Inst(Opcode::CompositeConstructF32x2, e1, e2); + return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); case Type::F64: - return Inst(Opcode::CompositeConstructF64x2, e1, e2); + return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); default: ThrowInvalidType(e1.Type()); } @@ -436,6 +437,7 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } + CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); @@ -445,6 +447,8 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); case Type::F64: return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + case Type::U32x2: + return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); default: ThrowInvalidType(e1.Type()); } @@ -1481,7 +1485,7 @@ Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Val } Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info) { + const Value& offset2, const F32& dref, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref : Opcode::BindlessImageGatherDref}; return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 770bbd550..77296cfa4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -99,6 +99,12 @@ public: return ret; } + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { + std::memcpy(&flags, &new_val, sizeof(new_val)); + } + /// Intrusively store the host definition of this instruction. template void SetDefinition(DefinitionType def) { diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 4f09a4b39..20fb14fea 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -32,6 +32,11 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +struct CompositeDecoration { + bool is_constant{false}; +}; +static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); + union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 60a0bc980..0dc0aabdf 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -126,6 +126,7 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index e8e4662e7..7671fc3d8 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -44,6 +44,20 @@ bool Value::IsEmpty() const noexcept { return type == Type::Void; } +bool Value::IsConstantContainer() const { + if (IsImmediate()) { + return true; + } + ValidateAccess(Type::Opaque); + auto num_args = inst->NumArgs(); + for (size_t i = 0; i < num_args; i++) { + if (!inst->Arg(i).IsConstantContainer()) { + return false; + } + } + return true; +} + bool Value::IsImmediate() const noexcept { if (IsIdentity()) { return inst->Arg(0).IsImmediate(); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..5d6e74c14 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -38,6 +38,7 @@ public: [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; + [[nodiscard]] bool IsConstantContainer() const; [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 1f1689c43..b2da079f9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -101,16 +101,18 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { switch (type) { case TextureType::_1D: case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); case TextureType::_2D: case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); case TextureType::_3D: case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); case TextureType::CUBE: case TextureType::ARRAY_CUBE: throw NotImplementedException("Illegal offset on CUBE sample"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index 8c6384040..cdf5cb5c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const auto getVector = ([&v](const IR::U32& value) { + const IR::U32 bitsize = v.ir.Imm32(6); + const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); }); - return {getVector(value1), getVector(value2)}; + return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), + getVector(value2, 0), getVector(value2, 16)); } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -155,7 +155,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy break; } case OffsetType::PTP: { - std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + offset2 = MakeOffsetPTP(v, meta_reg); break; } default: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 28060dccf..12159e738 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -355,6 +355,17 @@ void FoldBranchConditional(IR::Inst& inst) { } } +void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { + for (size_t i = 0; i < amount; i++) { + if (!inst.Arg(i).IsConstantContainer()) { + return; + } + } + auto info{inst.Flags()}; + info.is_constant = true; + inst.SetFlags(info); +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -380,6 +391,13 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); + case IR::Opcode::CompositeConstructU32x2: + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF32x2: + case IR::Opcode::CompositeConstructF64x2: + return FoldConstantComposite(inst, 2); + case IR::Opcode::CompositeConstructArrayU32x2: + return FoldConstantComposite(inst, 4); case IR::Opcode::FPMul32: return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 41550bfc6..64031f49c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -30,6 +30,7 @@ struct Profile { bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; bool support_vote{}; + bool support_variadic_ptp{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead From b5db38f50e9f81964bf0cc946e4ed5b00fe564d0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 19:24:50 +0100 Subject: [PATCH 107/770] shader: Add IR opcode for ImageFetch --- .../backend/spirv/emit_spirv.h | 4 +++ .../backend/spirv/emit_spirv_composite.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 32 +++++++++++++++++-- .../frontend/ir/ir_emitter.cpp | 6 ++++ .../frontend/ir/ir_emitter.h | 8 +++-- src/shader_recompiler/frontend/ir/opcodes.inc | 3 ++ src/shader_recompiler/ir_opt/texture_pass.cpp | 5 +++ 7 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index eaf94dad5..cc02f53f1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -343,12 +343,14 @@ Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); Id EmitBindlessImageGather(EmitContext&); Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); Id EmitBoundImageGather(EmitContext&); Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -361,6 +363,8 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 0da682859..f01d69d91 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -170,7 +170,7 @@ Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4); } if (ctx.profile.support_variadic_ptp) { - return OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); + return ctx.OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); } return {}; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 776afd4ab..13bc8831f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -39,6 +39,18 @@ public: } } + explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id lod, Id ms) { + if (Sirit::ValidId(lod)) { + Add(spv::ImageOperandsMask::Lod, lod); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (Sirit::ValidId(ms)) { + Add(spv::ImageOperandsMask::Sample, ms); + } + } + void Add(spv::ImageOperandsMask new_mask, Id value) { mask = static_cast(static_cast(mask) | static_cast(new_mask)); @@ -115,6 +127,10 @@ Id EmitBindlessImageGatherDref(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -139,6 +155,10 @@ Id EmitBoundImageGatherDref(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -178,7 +198,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - [[maybe_unused]] Id offset2) { + Id offset2) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, @@ -188,11 +208,19 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id offset, [[maybe_unused]] Id offset2, Id dref) { + Id offset, Id offset2, Id dref) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, offset, lod, ms); + return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], + Texture(ctx, index), coords, operands.Mask(), operands.Span()); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f49c30484..b8d36f362 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1491,6 +1491,12 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); } +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch}; + return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 04b43197f..446fd7785 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -243,8 +243,12 @@ public: [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); - [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info); + [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, + const Value& offset, const Value& offset2, const F32& dref, + TextureInstInfo info); + + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0dc0aabdf..3dacd7b6b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -356,6 +356,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -363,6 +364,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -370,6 +372,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 454ac3e71..0167dd06e 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -51,6 +51,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BoundImageGatherDref: return IR::Opcode::ImageGatherDref; + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BoundImageFetch: + return IR::Opcode::ImageFetch; default: return IR::Opcode::Void; } @@ -64,6 +67,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageSampleDrefExplicitLod: case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BindlessImageFetch: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -71,6 +75,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageSampleDrefExplicitLod: case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: + case IR::Opcode::BoundImageFetch: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); From d9c5bd9509e82fcde72c18663989931f97ed6518 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 16:46:07 -0300 Subject: [PATCH 108/770] shader: Refactor PTP and other minor changes --- .../backend/spirv/emit_context.cpp | 20 +++++----- .../backend/spirv/emit_context.h | 1 - .../backend/spirv/emit_spirv.h | 13 +++---- .../backend/spirv/emit_spirv_composite.cpp | 30 ++------------- .../backend/spirv/emit_spirv_image.cpp | 38 ++++++++++++++----- .../frontend/ir/ir_emitter.cpp | 12 ++---- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/modifiers.h | 5 --- src/shader_recompiler/frontend/ir/opcodes.inc | 1 - src/shader_recompiler/frontend/ir/value.cpp | 14 ------- src/shader_recompiler/frontend/ir/value.h | 1 - .../maxwell/translate/impl/texture_gather.cpp | 28 +++++++------- .../ir_opt/constant_propagation_pass.cpp | 22 +---------- src/shader_recompiler/profile.h | 1 - 14 files changed, 67 insertions(+), 123 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 7d8b938d1..50793b5bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -169,7 +169,6 @@ void EmitContext::DefineCommonTypes(const Info& info) { AddCapability(spv::Capability::Float64); F64.Define(*this, TypeFloat(64), "f64"); } - array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2"); } void EmitContext::DefineCommonConstants() { @@ -352,20 +351,19 @@ void EmitContext::DefineOutputs(const Info& info) { } } if (stage == Stage::Fragment) { - for (size_t i = 0; i < 8; ++i) { - if (!info.stores_frag_color[i]) { + for (u32 index = 0; index < 8; ++index) { + if (!info.stores_frag_color[index]) { continue; } - frag_color[i] = DefineOutput(*this, F32[4]); - Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); - Name(frag_color[i], fmt::format("frag_color{}", i)); + frag_color[index] = DefineOutput(*this, F32[4]); + Decorate(frag_color[index], spv::Decoration::Location, index); + Name(frag_color[index], fmt::format("frag_color{}", index)); } - if (!info.stores_frag_depth) { - return; + if (info.stores_frag_depth) { + frag_depth = DefineOutput(*this, F32[1]); + Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); + Name(frag_depth, "frag_depth"); } - frag_depth = DefineOutput(*this, F32[1]); - Decorate(frag_depth, spv::Decoration::BuiltIn, static_cast(spv::BuiltIn::FragDepth)); - Name(frag_depth, "frag_depth"); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 0a1e85408..5ed815c06 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -65,7 +65,6 @@ public: VectorTypes U32; VectorTypes F16; VectorTypes F64; - Id array_U32x2; Id true_value{}; Id false_value{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index cc02f53f1..4da1f3707 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); @@ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); @@ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); @@ -122,7 +122,6 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -359,10 +358,10 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod_lc, Id offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id offset2); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id offset, Id offset2, Id dref); + const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); Id EmitVoteAll(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index f01d69d91..079e226de 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -7,11 +7,7 @@ namespace Shader::Backend::SPIRV { -Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { - const auto info{inst->Flags()}; - if (info.is_constant) { - return ctx.ConstantComposite(ctx.U32[2], e1, e2); - } +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); } @@ -47,12 +43,7 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { - - const auto info{inst->Flags()}; - if (info.is_constant) { - return ctx.ConstantComposite(ctx.F16[2], e1, e2); - } +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); } @@ -88,11 +79,7 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { - const auto info{inst->Flags()}; - if (info.is_constant) { - return ctx.ConstantComposite(ctx.F32[2], e1, e2); - } +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); } @@ -164,15 +151,4 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } -Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { - const auto info{inst->Flags()}; - if (info.is_constant) { - return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4); - } - if (ctx.profile.support_variadic_ptp) { - return ctx.OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); - } - return {}; -} - } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 13bc8831f..b6e9d3c0c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -30,16 +30,34 @@ public: } } - explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) { - if (Sirit::ValidId(offset)) { - Add(spv::ImageOperandsMask::Offset, offset); + explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) { + if (offset2.IsEmpty()) { + if (offset.IsEmpty()) { + return; + } + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + return; } - if (Sirit::ValidId(offset2)) { - Add(spv::ImageOperandsMask::ConstOffsets, offset2); + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + throw NotImplementedException("Not all arguments in PTP are immediate"); } + const IR::Opcode opcode{values[0]->Opcode()}; + if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }}; + + const Id offsets{ + ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), + ctx.ConstantComposite(ctx.U32[2], read(0, 0), read(0, 1)), + ctx.ConstantComposite(ctx.U32[2], read(0, 2), read(0, 3)), + ctx.ConstantComposite(ctx.U32[2], read(1, 0), read(1, 1)), + ctx.ConstantComposite(ctx.U32[2], read(1, 2), read(1, 3)))}; + Add(spv::ImageOperandsMask::ConstOffsets, offsets); } - explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id lod, Id ms) { + explicit ImageOperands(Id offset, Id lod, Id ms) { if (Sirit::ValidId(lod)) { Add(spv::ImageOperandsMask::Lod, lod); } @@ -197,8 +215,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id offset2) { +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, @@ -208,7 +226,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id offset, Id offset2, Id dref) { + const IR::Value& offset, const IR::Value& offset2, Id dref) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, @@ -218,7 +236,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, offset, lod, ms); + const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, operands.Mask(), operands.Span()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b8d36f362..0296f8773 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -398,16 +398,15 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } - CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: - return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructU32x2, e1, e2); case Type::F16: - return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructF16x2, e1, e2); case Type::F32: - return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructF32x2, e1, e2); case Type::F64: - return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructF64x2, e1, e2); default: ThrowInvalidType(e1.Type()); } @@ -437,7 +436,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } - CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); @@ -447,8 +445,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); case Type::F64: return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); - case Type::U32x2: - return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); default: ThrowInvalidType(e1.Type()); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 77296cfa4..6658dc674 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -101,8 +101,8 @@ public: template requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { - std::memcpy(&flags, &new_val, sizeof(new_val)); + [[nodiscard]] void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); } /// Intrusively store the host definition of this instruction. diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 20fb14fea..4f09a4b39 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -32,11 +32,6 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); -struct CompositeDecoration { - bool is_constant{false}; -}; -static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); - union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 3dacd7b6b..e12b92c47 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -126,7 +126,6 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) -OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 7671fc3d8..e8e4662e7 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -44,20 +44,6 @@ bool Value::IsEmpty() const noexcept { return type == Type::Void; } -bool Value::IsConstantContainer() const { - if (IsImmediate()) { - return true; - } - ValidateAccess(Type::Opaque); - auto num_args = inst->NumArgs(); - for (size_t i = 0; i < num_args; i++) { - if (!inst->Arg(i).IsConstantContainer()) { - return false; - } - } - return true; -} - bool Value::IsImmediate() const noexcept { if (IsIdentity()) { return inst->Arg(0).IsImmediate(); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 5d6e74c14..b27601e70 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -38,7 +38,6 @@ public: [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; - [[nodiscard]] bool IsConstantContainer() const; [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index cdf5cb5c4..b2f9cda46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const IR::U32 bitsize = v.ir.Imm32(6); - const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { - return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), - v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); - }); - return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), - getVector(value2, 0), getVector(value2, 16)); + const IR::U32 bitsize{v.ir.Imm32(6)}; + const auto make_vector{[&v, &bitsize](const IR::U32& value) { + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); + }}; + return {make_vector(value1), make_vector(value2)}; } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -150,14 +150,12 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy switch (offset_type) { case OffsetType::None: break; - case OffsetType::AOFFI: { + case OffsetType::AOFFI: offset = MakeOffset(v, meta_reg, tld4.type); break; - } - case OffsetType::PTP: { - offset2 = MakeOffsetPTP(v, meta_reg); + case OffsetType::PTP: + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); break; - } default: throw NotImplementedException("Invalid offset type {}", offset_type); } @@ -167,7 +165,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy IR::TextureInstInfo info{}; info.type.Assign(GetType(tld4.type, tld4.dc != 0)); info.gather_component.Assign(static_cast(component_type)); - const IR::Value sample{[&]() -> IR::Value { + const IR::Value sample{[&] { if (tld4.dc == 0) { return v.ir.ImageGather(handle, coords, offset, offset2, info); } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 12159e738..052f1609b 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -355,17 +355,6 @@ void FoldBranchConditional(IR::Inst& inst) { } } -void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { - for (size_t i = 0; i < amount; i++) { - if (!inst.Arg(i).IsConstantContainer()) { - return; - } - } - auto info{inst.Flags()}; - info.is_constant = true; - inst.SetFlags(info); -} - void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -391,13 +380,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF32: case IR::Opcode::SelectF64: return FoldSelect(inst); - case IR::Opcode::CompositeConstructU32x2: - case IR::Opcode::CompositeConstructF16x2: - case IR::Opcode::CompositeConstructF32x2: - case IR::Opcode::CompositeConstructF64x2: - return FoldConstantComposite(inst, 2); - case IR::Opcode::CompositeConstructArrayU32x2: - return FoldConstantComposite(inst, 4); case IR::Opcode::FPMul32: return FoldFPMul32(inst); case IR::Opcode::LogicalAnd: @@ -423,12 +405,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::BitFieldSExtract: FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { - const size_t back_shift = static_cast(shift) + static_cast(count); + const size_t back_shift{static_cast(shift) + static_cast(count)}; if (back_shift > Common::BitSize()) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } - const size_t left_shift = Common::BitSize() - back_shift; + const size_t left_shift{Common::BitSize() - back_shift}; return static_cast(static_cast(base << left_shift) >> static_cast(Common::BitSize() - count)); }); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 64031f49c..41550bfc6 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -30,7 +30,6 @@ struct Profile { bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; bool support_vote{}; - bool support_variadic_ptp{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: [PATCH 109/770] shader: Implement TXQ and fix FragDepth --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 8 +- .../backend/spirv/emit_context.h | 3 +- .../backend/spirv/emit_spirv.cpp | 10 +- .../backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_image.cpp | 48 +++++++++- src/shader_recompiler/environment.h | 3 + .../frontend/ir/ir_emitter.cpp | 6 ++ .../frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../translate/impl/not_implemented.cpp | 8 -- .../maxwell/translate/impl/texture_query.cpp | 76 +++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 20 +++- .../renderer_vulkan/vk_pipeline_cache.cpp | 92 +++++++++++++++++++ 15 files changed, 264 insertions(+), 21 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 5f3868bfe..7f8dc8eed 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -126,6 +126,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 50793b5bf..c2d13f97c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -244,8 +244,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { if (desc.count != 1) { throw NotImplementedException("Array of textures"); } - const Id type{TypeSampledImage(ImageType(*this, desc))}; - const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; + const Id image_type{ImageType(*this, desc)}; + const Id sampled_type{TypeSampledImage(image_type)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -254,7 +255,8 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { // TODO: Pass count info textures.push_back(TextureDefinition{ .id{id}, - .type{type}, + .sampled_type{sampled_type}, + .image_type{image_type}, }); } binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 5ed815c06..0cb411a0e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -31,7 +31,8 @@ private: struct TextureDefinition { Id id; - Id type; + Id sampled_type; + Id image_type; }; struct UniformDefinitions { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index cee72f50d..4bed16e7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -126,10 +126,10 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } -void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { +void DefineEntryPoint(Environment& env, const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (env.ShaderStage()) { + switch (program.stage) { case Shader::Stage::Compute: { const std::array workgroup_size{env.WorkgroupSize()}; execution_model = spv::ExecutionModel::GLCompute; @@ -143,6 +143,9 @@ void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { case Shader::Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + if (program.info.stores_frag_depth) { + ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + } break; default: throw NotImplementedException("Stage {}", env.ShaderStage()); @@ -235,6 +238,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); + ctx.AddCapability(spv::Capability::ImageQuery); } Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { @@ -267,7 +271,7 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program u32& binding) { EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(env, ctx, main); + DefineEntryPoint(env, program, ctx, main); if (profile.support_float_controls) { ctx.AddExtension("SPV_KHR_float_controls"); SetupDenormControl(profile, program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4da1f3707..b82b16e9d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -343,6 +343,7 @@ Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); Id EmitBindlessImageGather(EmitContext&); Id EmitBindlessImageGatherDref(EmitContext&); Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); @@ -350,6 +351,7 @@ Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); Id EmitBoundImageGather(EmitContext&); Id EmitBoundImageGatherDref(EmitContext&); Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -364,6 +366,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index b6e9d3c0c..3ea0011aa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -91,7 +91,15 @@ private: Id Texture(EmitContext& ctx, const IR::Value& index) { if (index.IsImmediate()) { const TextureDefinition def{ctx.textures.at(index.U32())}; - return ctx.OpLoad(def.type, def.id); + return ctx.OpLoad(def.sampled_type, def.id); + } + throw NotImplementedException("Indirect texture sample"); +} + +Id TextureImage(EmitContext& ctx, const IR::Value& index) { + if (index.IsImmediate()) { + const TextureDefinition def{ctx.textures.at(index.U32())}; + return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } throw NotImplementedException("Indirect texture sample"); } @@ -149,6 +157,10 @@ Id EmitBindlessImageFetch(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -177,6 +189,10 @@ Id EmitBoundImageFetch(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -241,4 +257,34 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c Texture(ctx, index), coords, operands.Mask(), operands.Span()); } +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { + const auto info{inst->Flags()}; + const Id image{TextureImage(ctx, index)}; + const Id zero{ctx.u32_zero_value}; + const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::Shadow1D: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), + zero, zero, mips()); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + case TextureType::ShadowArray1D: + case TextureType::Shadow2D: + case TextureType::ShadowCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), + zero, mips()); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + case TextureType::ShadowArray2D: + case TextureType::Shadow3D: + case TextureType::ShadowArrayCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), + mips()); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 6dec4b255..0c62c1c54 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -4,6 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/program_header.h" +#include "shader_recompiler/shader_info.h" #include "shader_recompiler/stage.h" namespace Shader { @@ -14,6 +15,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; [[nodiscard]] virtual std::array WorkgroupSize() const = 0; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0296f8773..f281c023f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1493,6 +1493,12 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); } +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions + : Opcode::BindlessImageQueryDimensions}; + return Inst(op, handle, lod); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 446fd7785..771c186d4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -239,6 +239,7 @@ public: const F32& dref, const F32& lod, const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e12b92c47..5d7462d76 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -356,6 +356,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -364,6 +365,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -372,6 +374,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, U32, OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 788765c21..96ee2e741 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -373,14 +373,6 @@ void TranslatorVisitor::TXD_b(u64) { ThrowNotImplemented(Opcode::TXD_b); } -void TranslatorVisitor::TXQ(u64) { - ThrowNotImplemented(Opcode::TXQ); -} - -void TranslatorVisitor::TXQ_b(u64) { - ThrowNotImplemented(Opcode::TXQ_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..e8ea8faeb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Dimension = 1, + TextureType = 2, + SamplePos = 5, +}; + +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { + switch (mode) { + case Mode::Dimension: { + const IR::U32 lod{v.X(src_reg)}; + return v.ir.ImageQueryDimension(handle, lod); + } + case Mode::TextureType: + case Mode::SamplePos: + default: + throw NotImplementedException("Mode {}", mode); + } +} + +void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<22, 3, Mode> mode; + BitField<31, 4, u64> mask; + } const txq{insn}; + + IR::Reg src_reg{txq.src_reg}; + IR::U32 handle; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(src_reg); + ++src_reg; + } + const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + IR::Reg dest_reg{txq.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((txq.mask >> element) & 1) == 0) { + continue; + } + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXQ(u64 insn) { + union { + u64 raw; + BitField<36, 13, u64> cbuf_offset; + } const txq{insn}; + + Impl(*this, insn, static_cast(txq.cbuf_offset)); +} + +void TranslatorVisitor::TXQ_b(u64 insn) { + Impl(*this, insn, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6fe06fda8..80ca8db26 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -365,7 +365,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: { + case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageQueryDimensions: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 0167dd06e..dfacf848f 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -54,6 +54,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BindlessImageFetch: case IR::Opcode::BoundImageFetch: return IR::Opcode::ImageFetch; + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BindlessImageQueryDimensions: + return IR::Opcode::ImageQueryDimensions; default: return IR::Opcode::Void; } @@ -68,6 +71,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -76,6 +80,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); @@ -198,13 +203,20 @@ void TexturePass(Environment& env, IR::Program& program) { for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + + const auto& cbuf{texture_inst.cbuf}; + auto flags{inst->Flags()}; + if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) { + flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + inst->SetFlags(flags); + } const u32 index{descriptors.Add(TextureDescriptor{ - .type{inst->Flags().type}, - .cbuf_index{texture_inst.cbuf.index}, - .cbuf_offset{texture_inst.cbuf.offset}, + .type{flags.type}, + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, .count{1}, })}; - inst->ReplaceOpcode(IndexedInstruction(*inst)); inst->SetArg(0, IR::Value{index}); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 504b8c9d6..30d424346 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -25,6 +25,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -45,6 +46,10 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } +u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | static_cast(offset); +} + class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; @@ -101,15 +106,21 @@ public: const auto data{std::make_unique(code_size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + const u64 num_texture_types{static_cast(texture_types.size())}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); @@ -147,10 +158,47 @@ protected: return std::nullopt; } + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, + u32 cbuf_offset) { + const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + + const Shader::TextureType result{[&] { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + throw Shader::NotImplementedException("Texture buffer"); + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } + }()}; + texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + return result; + } + Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; std::vector code; + std::unordered_map texture_types; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -176,29 +224,45 @@ public: switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; + stage_index = 0; break; case Maxwell::ShaderProgram::VertexB: stage = Shader::Stage::VertexB; + stage_index = 0; break; case Maxwell::ShaderProgram::TesselationControl: stage = Shader::Stage::TessellationControl; + stage_index = 1; break; case Maxwell::ShaderProgram::TesselationEval: stage = Shader::Stage::TessellationEval; + stage_index = 2; break; case Maxwell::ShaderProgram::Geometry: stage = Shader::Stage::Geometry; + stage_index = 3; break; case Maxwell::ShaderProgram::Fragment: stage = Shader::Stage::Fragment; + stage_index = 4; break; default: UNREACHABLE_MSG("Invalid program={}", program); + break; } } ~GraphicsEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{maxwell3d->regs}; + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, + cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } @@ -209,6 +273,7 @@ public: private: Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; }; class ComputeEnvironment final : public GenericEnvironment { @@ -224,6 +289,15 @@ public: ~ComputeEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, + cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } @@ -278,7 +352,9 @@ class FileEnvironment final : public Shader::Environment { public: void Deserialize(std::ifstream& file) { u64 code_size{}; + u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -286,6 +362,13 @@ public: .read(reinterpret_cast(&stage), sizeof(stage)); code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u64 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { @@ -300,6 +383,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -310,6 +401,7 @@ public: private: std::unique_ptr code; + std::unordered_map texture_types; std::array workgroup_size{}; u32 texture_bound{}; u32 read_lowest{}; From ec005be99d4f231f6d4d812841c84ab4af4204a6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:55:07 -0300 Subject: [PATCH 110/770] shader: Fix rasterizer integration order issues --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 7 +++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | 5 +---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a87ed1976..82536b9d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -139,7 +139,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector samplers; texture_cache->SynchronizeGraphicsDescriptors(); - texture_cache->UpdateRenderTargets(false); const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; @@ -181,13 +180,17 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + texture_cache->UpdateRenderTargets(false); + scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); + + scheduler->BindGraphicsPipeline(*pipeline); + if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - scheduler->BindGraphicsPipeline(*pipeline); scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8f63a7591..d7d9927dd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -178,7 +178,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); UpdateDynamicStates(); const auto& regs{maxwell3d.regs}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 1c6ba7289..b2dcd74ab 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -56,15 +56,12 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; - u32 num_images{0}; - for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; if (format == PixelFormat::Invalid) { continue; } descriptions.push_back(AttachmentDescription(*device, format, key.samples)); - ++num_images; } const size_t num_colors{descriptions.size()}; const VkAttachmentReference* depth_attachment{}; @@ -89,7 +86,7 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pNext = nullptr, .flags = 0, .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), + .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .subpassCount = 1, .pSubpasses = &subpass, .dependencyCount = 0, From cdf0cc38698bf31773edd0016d5171bd11b966d0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 04:19:38 +0100 Subject: [PATCH 111/770] shader: Fix TXQ --- .../frontend/maxwell/translate/impl/texture_query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index e8ea8faeb..8c7e04bca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -66,7 +66,7 @@ void TranslatorVisitor::TXQ(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const txq{insn}; - Impl(*this, insn, static_cast(txq.cbuf_offset)); + Impl(*this, insn, static_cast(txq.cbuf_offset * 4)); } void TranslatorVisitor::TXQ_b(u64 insn) { From a806b29cb9bb48c4a9628700946231c9150463b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:54:32 -0300 Subject: [PATCH 112/770] shader: Fix structured control flow on KIL instructions This could potentially leave unvisited blocks, leading to illegal phi nodes. --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f281c023f..82613f607 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -83,6 +83,7 @@ void IREmitter::SelectionMerge(Block* merge_block) { } void IREmitter::Return() { + block->SetReturn(); Inst(Opcode::Return); } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 79e344986..9d4688390 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -152,7 +152,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { switch (stmt->type) { case StatementType::Code: - ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, + stmt->code->LocationBegin(), stmt->code->LocationEnd(), + reinterpret_cast(stmt->code)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -749,8 +751,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); - current_block = nullptr; + IR::Block* demote_block{MergeBlock(parent, stmt)}; + IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + current_block = demote_block; break; } default: From f0031babeb3ed04aef2468840aa37f4da13b2524 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:55:37 -0300 Subject: [PATCH 113/770] shader: Implement front face --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 +++ src/shader_recompiler/backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/shader_info.h | 1 + 5 files changed, 12 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index c2d13f97c..4d5dabcbf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -302,6 +302,9 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex); } } + if (info.loads_front_face) { + front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); + } for (size_t index = 0; index < info.loads_generics.size(); ++index) { if (!info.loads_generics[index]) { continue; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 0cb411a0e..01b7b665d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -94,6 +94,7 @@ public: Id vertex_id{}; Id vertex_index{}; Id base_vertex{}; + Id front_face{}; Id input_position{}; std::array input_generics{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8fc040f8b..6fa16eb80 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -156,6 +156,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index), ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); } + case IR::Attribute::FrontFace: + return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), + ctx.Constant(ctx.U32[1], std::numeric_limits::max()), + ctx.u32_zero_value); default: throw NotImplementedException("Read attribute {}", attr); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 80ca8db26..0ec0d4c01 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -44,6 +44,9 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::VertexId: info.loads_vertex_id = true; break; + case IR::Attribute::FrontFace: + info.loads_front_face = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 27e61a5f9..8ab66bb2a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -62,6 +62,7 @@ struct Info { bool loads_position{}; bool loads_instance_id{}; bool loads_vertex_id{}; + bool loads_front_face{}; std::array stores_frag_color{}; bool stores_frag_depth{}; From cb6039ccea77d35fb829c337fd61451f549e3453 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:56:09 -0300 Subject: [PATCH 114/770] vk_pipeline_cache: Fix pipeline and shader caches --- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 ++++++++++++++----- .../renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30d424346..51c155077 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -62,7 +62,7 @@ public: ~GenericEnvironment() override = default; std::optional Analyze() { - const std::optional size{TryFindSize(start_address)}; + const std::optional size{TryFindSize()}; if (!size) { return std::nullopt; } @@ -71,6 +71,13 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } + void SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); + } + [[nodiscard]] size_t CachedSize() const noexcept { return cached_highest - cached_lowest + INST_SIZE; } @@ -80,7 +87,7 @@ public: } [[nodiscard]] bool CanBeSerialized() const noexcept { - return has_unbound_instructions; + return !has_unbound_instructions; } [[nodiscard]] u128 CalculateHash() const { @@ -95,7 +102,7 @@ public: read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { - return code[address / INST_SIZE]; + return code[(address - cached_lowest) / INST_SIZE]; } has_unbound_instructions = true; return gpu_memory->Read(program_base + address); @@ -117,30 +124,34 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } + file.flush(); } protected: static constexpr size_t INST_SIZE = sizeof(u64); - std::optional TryFindSize(GPUVAddr guest_addr) { + std::optional TryFindSize() { constexpr size_t BLOCK_SIZE = 0x1000; constexpr size_t MAXIMUM_SIZE = 0x100000; constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - size_t offset = 0; - size_t size = BLOCK_SIZE; + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; @@ -623,6 +634,7 @@ bool PipelineCache::RefreshStages() { GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; shader_info = MakeShaderInfo(env, *cpu_shader_addr); } + shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; @@ -707,6 +719,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { GraphicsEnvironment& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + generic_envs.push_back(&env); envs.push_back(&env); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e09d78063..b55e14189 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -172,6 +172,7 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + std::array shader_infos{}; std::unordered_map compute_cache; std::unordered_map graphics_cache; From 675a82416d7775dc7a252a5d8f5b704e6b8f2326 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 03:08:31 -0300 Subject: [PATCH 115/770] spirv: Remove dependencies on Environment when generating SPIR-V --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 11 +++++------ src/shader_recompiler/backend/spirv/emit_spirv.h | 5 ++--- src/shader_recompiler/frontend/ir/program.h | 2 ++ src/shader_recompiler/frontend/maxwell/program.cpp | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 10 +++------- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4bed16e7b..2e7e6bb0c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -126,12 +126,12 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } -void DefineEntryPoint(Environment& env, const IR::Program& program, EmitContext& ctx, Id main) { +void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; switch (program.stage) { case Shader::Stage::Compute: { - const std::array workgroup_size{env.WorkgroupSize()}; + const std::array workgroup_size{program.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); @@ -148,7 +148,7 @@ void DefineEntryPoint(Environment& env, const IR::Program& program, EmitContext& } break; default: - throw NotImplementedException("Stage {}", env.ShaderStage()); + throw NotImplementedException("Stage {}", program.stage); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -267,11 +267,10 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, - u32& binding) { +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(env, program, ctx, main); + DefineEntryPoint(program, ctx, main); if (profile.support_float_controls) { ctx.AddExtension("SPV_KHR_float_controls"); SetupDenormControl(profile, program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b82b16e9d..837f0e858 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,15 +8,14 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" -#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, - IR::Program& program, u32& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, + u32& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 733513c8b..0162e919c 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include @@ -19,6 +20,7 @@ struct Program { BlockList post_order_blocks; Info info; Stage stage{}; + std::array workgroup_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0074eb89b..6efaf6ee0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -33,6 +33,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool modules; u32 binding{0}; - env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == u128{}) { continue; @@ -691,11 +690,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - Shader::Environment& env{*envs[env_index]}; - ++env_index; - - const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; - const std::vector code{EmitSPIRV(profile, env, program, binding)}; + const Shader::Profile profile{MakeProfile(key, program.stage)}; + const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, @@ -753,7 +749,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } From dbd882ddeb1a1a9233c0085d0b8ccb022db385b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: [PATCH 116/770] shader: Better interpolation and disabled attributes support --- .../backend/spirv/emit_context.cpp | 29 +++++++++++++-- .../spirv/emit_spirv_context_get_set.cpp | 29 ++++++++++----- .../frontend/maxwell/program.cpp | 35 +++++++++++++++++++ .../translate/impl/load_store_attribute.cpp | 10 +----- .../ir_opt/collect_shader_info_pass.cpp | 2 +- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 13 ++++++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 ++ 9 files changed, 101 insertions(+), 25 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4d5dabcbf..a8ca33c1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -76,6 +76,8 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { return ctx.TypeVector(ctx.TypeInt(32, true), 4); case AttributeType::UnsignedInt: return ctx.U32[4]; + case AttributeType::Disabled: + break; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -305,15 +307,36 @@ void EmitContext::DefineInputs(const Info& info) { if (info.loads_front_face) { front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); } - for (size_t index = 0; index < info.loads_generics.size(); ++index) { - if (!info.loads_generics[index]) { + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const InputVarying generic{info.input_generics[index]}; + if (!generic.used) { continue; } - const Id type{GetAttributeType(*this, profile.generic_input_types[index])}; + const AttributeType input_type{profile.generic_input_types[index]}; + if (input_type == AttributeType::Disabled) { + continue; + } + const Id type{GetAttributeType(*this, input_type)}; const Id id{DefineInput(*this, type)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + + if (stage != Stage::Fragment) { + continue; + } + switch (generic.interpolation) { + case Interpolation::Smooth: + // Default + // Decorate(id, spv::Decoration::Smooth); + break; + case Interpolation::NoPerspective: + Decorate(id, spv::Decoration::NoPerspective); + break; + case Interpolation::Flat: + Decorate(id, spv::Decoration::Flat); + break; + } } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 6fa16eb80..4cbc2aec1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,16 +10,23 @@ namespace Shader::Backend::SPIRV { namespace { -std::tuple AttrTypes(EmitContext& ctx, u32 index) { - const bool is_first_reader{ctx.stage == Stage::VertexB}; +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +std::optional AttrTypes(EmitContext& ctx, u32 index) { const AttributeType type{ctx.profile.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: - return {ctx.input_f32, ctx.F32[1], false}; + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; case AttributeType::UnsignedInt: - return {ctx.input_u32, ctx.U32[1], true}; + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; case AttributeType::SignedInt: - return {ctx.input_s32, ctx.TypeInt(32, true), true}; + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -129,11 +136,15 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const auto [pointer_type, type, needs_cast]{AttrTypes(ctx, index)}; + const std::optional type{AttrTypes(ctx, index)}; + if (!type) { + // Attribute is disabled + return ctx.Constant(ctx.F32[1], 0.0f); + } const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{ctx.OpAccessChain(pointer_type, generic_id, element_id())}; - const Id value{ctx.OpLoad(type, pointer)}; - return needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())}; + const Id value{ctx.OpLoad(type->id, pointer)}; + return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } switch (attr) { case IR::Attribute::PositionX: diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 6efaf6ee0..a914a91f4 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -27,6 +27,40 @@ static void RemoveUnreachableBlocks(IR::Program& program) { }); } +static void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; @@ -51,6 +85,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool loads_generics{}; + std::array input_generics{}; bool loads_position{}; bool loads_instance_id{}; bool loads_vertex_id{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 82536b9d6..278509bf0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,10 +221,10 @@ void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineSta } } static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].loads_generics; + const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < state.attributes.size(); ++index) { const auto& attribute = state.attributes[index]; - if (!attribute.enabled || !input_attributes[index]) { + if (!attribute.enabled || !input_attributes[index].used) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 251559b16..69dd945b2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -755,6 +755,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } switch (attr.Type()) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: From 0e1b213fa7fcc19cd76c215986bd606605b3ef60 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 05:26:29 -0300 Subject: [PATCH 117/770] shader: Add missing I2I exception when CC is used --- .../maxwell/translate/impl/integer_to_integer_conversion.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index ca28c6dd9..e8f35552c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -52,10 +52,14 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<13, 1, u64> src_fmt_sign; BitField<41, 3, u64> selector; BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; BitField<49, 1, u64> abs; BitField<50, 1, u64> sat; } const i2i{insn}; + if (i2i.cc != 0) { + throw NotImplementedException("I2I CC"); + } if (i2i.sat != 0) { throw NotImplementedException("I2I SAT"); } From 51475e21ba5e9a17730a2b5a868dc73d53db9bc1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Mar 2021 19:47:00 -0400 Subject: [PATCH 118/770] shader: Implement VMAD, VMNMX, VSETP --- src/shader_recompiler/CMakeLists.txt | 5 + .../frontend/ir/ir_emitter.cpp | 21 +++-- .../frontend/ir/ir_emitter.h | 2 + .../translate/impl/not_implemented.cpp | 13 --- .../maxwell/translate/impl/video_helper.cpp | 30 ++++++ .../maxwell/translate/impl/video_helper.h | 23 +++++ .../translate/impl/video_minimum_maximum.cpp | 92 +++++++++++++++++++ .../translate/impl/video_multiply_add.cpp | 64 +++++++++++++ .../translate/impl/video_set_predicate.cpp | 92 +++++++++++++++++++ 9 files changed, 319 insertions(+), 23 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 7f8dc8eed..55b846c84 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -127,6 +127,11 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp frontend/maxwell/translate/impl/texture_query.cpp + frontend/maxwell/translate/impl/video_helper.cpp + frontend/maxwell/translate/impl/video_helper.h + frontend/maxwell/translate/impl/video_minimum_maximum.cpp + frontend/maxwell/translate/impl/video_multiply_add.cpp + frontend/maxwell/translate/impl/video_set_predicate.cpp frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 82613f607..6d41442ee 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1121,6 +1121,10 @@ U32 IREmitter::UMin(const U32& a, const U32& b) { return Inst(Opcode::UMin32, a, b); } +U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMin(a, b) : UMin(a, b); +} + U32 IREmitter::SMax(const U32& a, const U32& b) { return Inst(Opcode::SMax32, a, b); } @@ -1129,6 +1133,10 @@ U32 IREmitter::UMax(const U32& a, const U32& b) { return Inst(Opcode::UMax32, a, b); } +U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMax(a, b) : UMax(a, b); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } @@ -1267,11 +1275,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { } U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { - if (is_signed) { - return ConvertFToS(bitsize, value); - } else { - return ConvertFToU(bitsize, value); - } + return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); } F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { @@ -1360,11 +1364,8 @@ F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, const Value& value) { - if (is_signed) { - return ConvertSToF(dest_bitsize, src_bitsize, value); - } else { - return ConvertUToF(dest_bitsize, src_bitsize, value); - } + return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value) + : ConvertUToF(dest_bitsize, src_bitsize, value); } U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 771c186d4..8d50aa607 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -196,8 +196,10 @@ public: [[nodiscard]] U32 FindUMsb(const U32& value); [[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b); + [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); [[nodiscard]] U32 SMax(const U32& a, const U32& b); [[nodiscard]] U32 UMax(const U32& a, const U32& b); + [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 96ee2e741..409216640 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -385,14 +385,6 @@ void TranslatorVisitor::VADD(u64) { ThrowNotImplemented(Opcode::VADD); } -void TranslatorVisitor::VMAD(u64) { - ThrowNotImplemented(Opcode::VMAD); -} - -void TranslatorVisitor::VMNMX(u64) { - ThrowNotImplemented(Opcode::VMNMX); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } @@ -400,11 +392,6 @@ void TranslatorVisitor::VOTE_vtg(u64) { void TranslatorVisitor::VSET(u64) { ThrowNotImplemented(Opcode::VSET); } - -void TranslatorVisitor::VSETP(u64) { - ThrowNotImplemented(Opcode::VSETP); -} - void TranslatorVisitor::VSHL(u64) { ThrowNotImplemented(Opcode::VSHL); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { + +IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, + u32 selector, bool is_signed) { + switch (width) { + case VideoWidth::Byte: + case VideoWidth::Unknown: + return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); + case VideoWidth::Short: + return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); + case VideoWidth::Word: + return value; + default: + throw NotImplementedException("Unknown VideoWidth {}", width); + } +} + +VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { + // immediates must be 16-bit format. + return is_immediate ? VideoWidth::Short : width; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +enum class VideoWidth : u64 { + Byte, + Unknown, + Short, + Word, +}; + +[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, + VideoWidth width, u32 selector, bool is_signed); + +[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VideoMinMaxOps : u64 { + MRG_16H, + MRG_16L, + MRG_8B0, + MRG_8B2, + ACC, + MIN, + MAX, +}; + +[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, + VideoMinMaxOps op, bool is_signed) { + switch (op) { + case VideoMinMaxOps::MIN: + return ir.IMin(lhs, rhs, is_signed); + case VideoMinMaxOps::MAX: + return ir.IMax(lhs, rhs, is_signed); + default: + throw NotImplementedException("VMNMX op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VMNMX(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 3, VideoMinMaxOps> op; + BitField<54, 1, u64> dest_sign; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> mx; + } const vmnmx{insn}; + + if (vmnmx.cc != 0) { + throw NotImplementedException("VMNMX CC"); + } + if (vmnmx.sat != 0) { + throw NotImplementedException("VMNMX SAT"); + } + // Selectors were shown to default to 2 in unit tests + if (vmnmx.src_a_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); + } + if (vmnmx.src_b_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); + } + if (vmnmx.src_a_width != VideoWidth::Word) { + throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); + } + + const bool is_b_imm{vmnmx.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vmnmx.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const VideoWidth a_width{vmnmx.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmnmx.src_a_sign != 0}; + const bool src_b_signed{vmnmx.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + + // First operation's sign is only dependent on operand b's sign + const bool op_1_signed{src_b_signed}; + + const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) + : ir.IMin(op_a, op_b, op_1_signed)}; + X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::VMAD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 2, u64> scale; + BitField<53, 1, u64> src_c_neg; + BitField<54, 1, u64> src_a_neg; + BitField<55, 1, u64> sat; + } const vmad{insn}; + + if (vmad.cc != 0) { + throw NotImplementedException("VMAD CC"); + } + if (vmad.sat != 0) { + throw NotImplementedException("VMAD SAT"); + } + if (vmad.scale != 0) { + throw NotImplementedException("VMAD SCALE"); + } + if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD PO"); + } + if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD NEG"); + } + const bool is_b_imm{vmad.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vmad.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const u32 a_selector{static_cast(vmad.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast(vmad.src_b_selector)}; + const VideoWidth a_width{vmad.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmad.src_a_sign != 0}; + const bool src_b_signed{vmad.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..af13b3fcc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VsetpCompareOp : u64 { + False = 0, + LessThan, + Equal, + LessThanEqual, + GreaterThan = 16, + NotEqual, + GreaterThanEqual, + True, +}; + +CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { + switch (op) { + case VsetpCompareOp::False: + return CompareOp::False; + case VsetpCompareOp::LessThan: + return CompareOp::LessThan; + case VsetpCompareOp::Equal: + return CompareOp::Equal; + case VsetpCompareOp::LessThanEqual: + return CompareOp::LessThanEqual; + case VsetpCompareOp::GreaterThan: + return CompareOp::GreaterThan; + case VsetpCompareOp::NotEqual: + return CompareOp::NotEqual; + case VsetpCompareOp::GreaterThanEqual: + return CompareOp::GreaterThanEqual; + case VsetpCompareOp::True: + return CompareOp::True; + default: + throw NotImplementedException("Invalid compare op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 5, VsetpCompareOp> compare_op; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + } const vsetp{insn}; + + const bool is_b_imm{vsetp.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; + + const u32 a_selector{static_cast(vsetp.src_a_selector)}; + const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; + const VideoWidth a_width{vsetp.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; + + const bool src_a_signed{vsetp.src_a_sign != 0}; + const bool src_b_signed{vsetp.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)}; + + // Compare operation's sign is only dependent on operand b's sign + const bool compare_signed{src_b_signed}; + const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; + const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; + const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; + ir.SetPred(vsetp.dest_pred_a, result_a); + ir.SetPred(vsetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell From 84298ce1917da637e7f60ee6c95602a8e7512c8a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 14:08:17 -0400 Subject: [PATCH 119/770] shader: Implement ISCADD CC --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 42fd42bb1..7aef37f54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -43,7 +43,10 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { v.X(iscadd.dest_reg, result); if (iscadd.cc != 0) { - throw NotImplementedException("ISCADD CC"); + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); } } From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: [PATCH 120/770] shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 115 +++++++++- .../backend/spirv/emit_context.h | 21 ++ .../backend/spirv/emit_spirv.h | 14 ++ .../spirv/emit_spirv_context_get_set.cpp | 10 + .../spirv/emit_spirv_shared_memory.cpp | 175 ++++++++++++++++ src/shader_recompiler/environment.h | 4 + .../frontend/ir/ir_emitter.cpp | 46 ++++ .../frontend/ir/ir_emitter.h | 6 + .../frontend/ir/microinstruction.cpp | 6 + src/shader_recompiler/frontend/ir/opcodes.inc | 18 ++ src/shader_recompiler/frontend/ir/program.h | 2 + .../frontend/maxwell/program.cpp | 2 + .../impl/load_store_local_shared.cpp | 197 ++++++++++++++++++ .../translate/impl/not_implemented.cpp | 16 -- .../ir_opt/collect_shader_info_pass.cpp | 6 + src/shader_recompiler/profile.h | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 47 ++++- .../vulkan_common/vulkan_device.cpp | 34 +++ src/video_core/vulkan_common/vulkan_device.h | 42 ++-- 20 files changed, 730 insertions(+), 36 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 55b846c84..003cbefb1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_shared_memory.cpp backend/spirv/emit_spirv_special.cpp backend/spirv/emit_spirv_undefined.cpp backend/spirv/emit_spirv_warp.cpp @@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/load_constant.cpp frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp + frontend/maxwell/translate/impl/load_store_local_shared.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/logic_operation.cpp frontend/maxwell/translate/impl/logic_operation_three_input.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a8ca33c1d..96d0e9b4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -9,6 +9,7 @@ #include #include "common/common_types.h" +#include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_context.h" namespace Shader::Backend::SPIRV { @@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) - : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); DefineInterfaces(program.info); + DefineLocalMemory(program); + DefineSharedMemory(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); @@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); @@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) { DefineOutputs(info); } +void EmitContext::DefineLocalMemory(const IR::Program& program) { + if (program.local_memory_size == 0) { + return; + } + const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id pointer{TypePointer(spv::StorageClass::Private, type)}; + local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(local_memory); + } +} + +void EmitContext::DefineSharedMemory(const IR::Program& program) { + if (program.shared_memory_size == 0) { + return; + } + const auto make{[&](Id element_type, u32 element_size) { + const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; + const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); + Decorate(struct_type, spv::Decoration::Block); + + const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; + const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; + const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; + Decorate(variable, spv::Decoration::Aliased); + interfaces.push_back(variable); + + return std::make_pair(variable, element_pointer); + }}; + if (profile.support_explicit_workgroup_layout) { + AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); + if (program.info.uses_int8) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); + std::tie(shared_memory_u8, shared_u8) = make(U8, 1); + } + if (program.info.uses_int16) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); + std::tie(shared_memory_u16, shared_u16) = make(U16, 2); + } + std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + } + const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); + interfaces.push_back(shared_memory_u32); + + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; + const Id count{Constant(U32[1], size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), + u32_zero_value, u32_zero_value, new_value, + old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (program.info.uses_int8) { + shared_store_u8_func = make_function(24, 8); + } + if (program.info.uses_int16) { + shared_store_u16_func = make_function(16, 16); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; @@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", index)); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } std::fill_n(ssbos.data() + index, desc.count, id); index += desc.count; binding += desc.count; @@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { .image_type{image_type}, }); } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } binding += desc.count; } } @@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: for (size_t i = 0; i < desc.count; ++i) { cbufs[desc.index + i].*member_type = id; } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 01b7b665d..1a4e8221a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -73,6 +73,14 @@ public: UniformDefinitions uniform_types; + Id private_u32{}; + + Id shared_u8{}; + Id shared_u16{}; + Id shared_u32{}; + Id shared_u32x2{}; + Id shared_u32x4{}; + Id input_f32{}; Id input_u32{}; Id input_s32{}; @@ -96,6 +104,17 @@ public: Id base_vertex{}; Id front_face{}; + Id local_memory{}; + + Id shared_memory_u8{}; + Id shared_memory_u16{}; + Id shared_memory_u32{}; + Id shared_memory_u32x2{}; + Id shared_memory_u32x4{}; + + Id shared_store_u8_func{}; + Id shared_store_u16_func{}; + Id input_position{}; std::array input_generics{}; @@ -111,6 +130,8 @@ private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); + void DefineLocalMemory(const IR::Program& program); + void DefineSharedMemory(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 837f0e858..4f62af959 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); @@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4cbc2aec1..52dcef8a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + ctx.OpStore(pointer, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..fa2fc9ab4 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -0,0 +1,175 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); +} + +Id Word(EmitContext& ctx, Id offset) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; + const Id count_id{ctx.Constant(ctx.U32[1], count)}; + return {bit, count_id}; +} +} // Anonymous namespace + +Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; + return ctx.OpLoad(ctx.U32[1], pointer); + } else { + return Word(ctx, offset); + } +} + +Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + return ctx.OpLoad(ctx.U32[2], pointer); + } else { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } +} + +Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + return ctx.OpLoad(ctx.U32[4], pointer); + } + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + std::array values{}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index + : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } + return ctx.OpCompositeConstruct(ctx.U32[4], values); +} + +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); + } +} + +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); + } +} + +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { + Id pointer{}; + if (ctx.profile.support_explicit_workgroup_layout) { + pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); + } else { + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + } + ctx.OpStore(pointer, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); +} + +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index + : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0c62c1c54..9415d02f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -19,6 +19,10 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; + [[nodiscard]] virtual u32 LocalMemorySize() const = 0; + + [[nodiscard]] virtual u32 SharedMemorySize() const = 0; + [[nodiscard]] virtual std::array WorkgroupSize() const = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { Inst(Opcode::WriteGlobal128, address, vector); } +U32 IREmitter::LoadLocal(const IR::U32& word_offset) { + return Inst(Opcode::LoadLocal, word_offset); +} + +void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { + Inst(Opcode::WriteLocal, word_offset, value); +} + +Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { + switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); + case 32: + return Inst(Opcode::LoadSharedU32, offset); + case 64: + return Inst(Opcode::LoadSharedU64, offset); + case 128: + return Inst(Opcode::LoadSharedU128, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size); +} + +void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { + switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + case 128: + Inst(Opcode::WriteSharedU128, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + } +} + U1 IREmitter::GetZeroFromOp(const Value& op) { return Inst(Opcode::GetZeroFromOp, op); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,12 @@ public: void WriteGlobal64(const U64& address, const IR::Value& vector); void WriteGlobal128(const U64& address, const IR::Value& vector); + [[nodiscard]] U32 LoadLocal(const U32& word_offset); + void WriteLocal(const U32& word_offset, const U32& value); + + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const U32& offset, const Value& value); + [[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteStorage32: case Opcode::WriteStorage64: case Opcode::WriteStorage128: + case Opcode::WriteLocal: + case Opcode::WriteSharedU8: + case Opcode::WriteSharedU16: + case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: + case Opcode::WriteSharedU128: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +// Local memory operations +OPCODE(LoadLocal, U32, U32, ) +OPCODE(WriteLocal, Void, U32, U32, ) + +// Shared memory operations +OPCODE(LoadSharedU8, U32, U32, ) +OPCODE(LoadSharedS8, U32, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) +OPCODE(LoadSharedS16, U32, U32, ) +OPCODE(LoadSharedU32, U32, U32, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(LoadSharedU128, U32x4, U32, ) +OPCODE(WriteSharedU8, Void, U32, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) +OPCODE(WriteSharedU32, Void, U32, U32, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) +OPCODE(WriteSharedU128, Void, U32, U32x4, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -21,6 +21,8 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset)); + } else { + const s32 relative{static_cast(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + const Size nnn = encoding.size; + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, ir.LoadLocal(word_offset)); + for (int i = 1; i < bit_size / 32; ++i) { + X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, bit_size / 32)) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDL(u64) { - ThrowNotImplemented(Opcode::LDL); -} - -void TranslatorVisitor::LDS(u64) { - ThrowNotImplemented(Opcode::LDS); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { ThrowNotImplemented(Opcode::ST); } -void TranslatorVisitor::STL(u64) { - ThrowNotImplemented(Opcode::STL); -} - void TranslatorVisitor::STP(u64) { ThrowNotImplemented(Opcode::STP); } -void TranslatorVisitor::STS(u64) { - ThrowNotImplemented(Opcode::STS); -} - void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60be67228..c932c307b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: + case IR::Opcode::LoadSharedU8: + case IR::Opcode::LoadSharedS8: + case IR::Opcode::WriteSharedU8: case IR::Opcode::SelectU8: case IR::Opcode::ConvertF16S8: case IR::Opcode::ConvertF16U8: @@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS16: case IR::Opcode::WriteStorageU16: case IR::Opcode::WriteStorageS16: + case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedS16: + case IR::Opcode::WriteSharedU16: case IR::Opcode::SelectU16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e26047751..0276fc23b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,8 @@ enum class AttributeType : u8 { }; struct Profile { + u32 supported_spirv{0x00010000}; + bool unified_descriptor_binding{}; bool support_vertex_instance_id{}; bool support_float_controls{}; @@ -30,6 +32,7 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_explicit_workgroup_layout{}; bool support_vote{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 69dd945b2..0d6a32bfd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,10 +114,12 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -132,7 +134,10 @@ public: file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + const u32 shared_memory_size{SharedMemorySize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), + sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } @@ -278,6 +283,16 @@ public: return maxwell3d->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const u64 size{sph.LocalMemorySize()}; + ASSERT(size <= std::numeric_limits::max()); + return static_cast(size); + } + + u32 SharedMemorySize() const override { + throw Shader::LogicError("Requesting shared memory size in graphics stage"); + } + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -313,6 +328,16 @@ public: return kepler_compute->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.local_pos_alloc; + } + + u32 SharedMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.shared_alloc; + } + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; @@ -366,6 +391,7 @@ public: u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -381,7 +407,8 @@ public: texture_types.emplace(key, type); } if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); } @@ -402,6 +429,14 @@ public: return it->second; } + u32 LocalMemorySize() const override { + return local_memory_size; + } + + u32 SharedMemorySize() const override { + return shared_memory_size; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -414,6 +449,8 @@ private: std::unique_ptr code; std::unordered_map texture_types; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; @@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ + .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, @@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { shader = MakeShaderInfo(env, *cpu_shader_addr); } const ComputePipelineCacheKey key{ - .unique_hash = shader->unique_hash, - .shared_memory_size = qmd.shared_alloc, + .unique_hash{shader->unique_hash}, + .shared_memory_size{qmd.shared_alloc}, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 009b74f12..c027598ba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; + if (khr_workgroup_memory_explicit_layout) { + workgroup_layout = { + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .workgroupMemoryExplicitLayout = VK_TRUE, + .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, + .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, + .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, + }; + SetNext(next, workgroup_layout); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -662,6 +676,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } bool has_khr_shader_float16_int8{}; + bool has_khr_workgroup_memory_explicit_layout{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -682,6 +697,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -694,6 +710,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_khr_workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -787,6 +805,22 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_extended_dynamic_state = true; } } + if (has_khr_workgroup_memory_explicit_layout) { + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; + layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; + layout.pNext = nullptr; + features.pNext = &layout; + physical.GetFeatures2KHR(features); + + if (layout.workgroupMemoryExplicitLayout && + layout.workgroupMemoryExplicitLayout8BitAccess && + layout.workgroupMemoryExplicitLayout16BitAccess && + layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { + extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + khr_workgroup_memory_explicit_layout = true; + } + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c268a4f8d..ac2311e7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -168,11 +168,21 @@ public: return nv_viewport_swizzle; } - /// Returns true if the device supports VK_EXT_scalar_block_layout. + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; } + /// Returns true if the device supports VK_KHR_spirv_1_4. + bool IsKhrSpirv1_4Supported() const { + return khr_spirv_1_4; + } + + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. + bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { + return khr_workgroup_memory_explicit_layout; + } + /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { return ext_index_type_uint8; @@ -296,20 +306,22 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. + bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. + bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters std::string vendor_name; ///< Device's driver name. From cd9f75e2239666a932861f6d54138febf8736a8c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 20:16:26 -0400 Subject: [PATCH 121/770] shader: Fix ISCADD logic for PO/CC --- .../maxwell/translate/impl/integer_scaled_add.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 7aef37f54..93cc2c0b1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -30,23 +30,24 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { if (iscadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } + } else { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); } // With the operands already processed, scale A const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; - IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; - if (po) { - // .PO adds one to the final result - result = v.ir.IAdd(result, v.ir.Imm32(1)); - } + const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; v.X(iscadd.dest_reg, result); if (iscadd.cc != 0) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); - v.SetCFlag(v.ir.GetCarryFromOp(result)); - v.SetOFlag(v.ir.GetOverflowFromOp(result)); + const IR::U1 carry{v.ir.GetCarryFromOp(result)}; + const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; + v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); + v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); } } From 3c758d9b538e957a20ea6db136741ad2bd16406d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 21:55:47 -0300 Subject: [PATCH 122/770] vk_pipeline_cache: Fix size hashing of shaders --- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0d6a32bfd..8b2816c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -68,7 +68,7 @@ public: } cached_lowest = start_address; cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + return Common::CityHash128(reinterpret_cast(code.data()), *size); } void SetCachedSize(size_t size_bytes) { @@ -126,12 +126,10 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); - file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } - file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -141,7 +139,6 @@ public: } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } - file.flush(); } protected: @@ -161,10 +158,10 @@ protected: code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { - const u64 inst = data[i / INST_SIZE]; + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + i; + return offset + index; } } guest_addr += BLOCK_SIZE; @@ -751,7 +748,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { continue; } const auto program{static_cast(index)}; - GraphicsEnvironment& env{graphics_envs[index]}; + auto& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); @@ -771,6 +768,8 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + main_pools.ReleaseContents(); ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; if (!pipeline_cache_filename.empty()) { From dbc1e5cde79b9165605741e1ea7158513ef6499f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Mar 2021 23:01:28 -0400 Subject: [PATCH 123/770] shader: Implement I2I SAT --- .../backend/spirv/emit_spirv.h | 2 + .../backend/spirv/emit_spirv_integer.cpp | 8 ++++ .../frontend/ir/ir_emitter.cpp | 8 ++++ .../frontend/ir/ir_emitter.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../impl/integer_to_integer_conversion.cpp | 40 ++++++++++++++----- 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4f62af959..af6b8a68f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -287,6 +287,8 @@ Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitUMin32(EmitContext& ctx, Id a, Id b); Id EmitSMax32(EmitContext& ctx, Id a, Id b); Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, Id value, Id min, Id max); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index a9c5e9cca..37fc7c7a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -163,6 +163,14 @@ Id EmitUMax32(EmitContext& ctx, Id a, Id b) { return ctx.OpUMax(ctx.U32[1], a, b); } +Id EmitSClamp32(EmitContext& ctx, Id value, Id min, Id max) { + return ctx.OpSClamp(ctx.U32[1], value, min, max); +} + +Id EmitUClamp32(EmitContext& ctx, Id value, Id min, Id max) { + return ctx.OpUClamp(ctx.U32[1], value, min, max); +} + Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d6a1d8ec2..9b898e4e1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1183,6 +1183,14 @@ U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { return is_signed ? SMax(a, b) : UMax(a, b); } +U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) { + return Inst(Opcode::SClamp32, value, min, max); +} + +U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { + return Inst(Opcode::UClamp32, value, min, max); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 842c2bdaf..269f367a4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -206,6 +206,8 @@ public: [[nodiscard]] U32 SMax(const U32& a, const U32& b); [[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); + [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c75658328..9b050995b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -299,6 +299,8 @@ OPCODE(SMin32, U32, U32, OPCODE(UMin32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, ) OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SClamp32, U32, U32, U32, U32, ) +OPCODE(UClamp32, U32, U32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) OPCODE(IEqual, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index e8f35552c..98b7f59f7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -30,16 +30,33 @@ enum class IntegerWidth : u64 { [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width) { const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 count{WidthSize(ir, dst_width)}; + return ir.BitFieldExtract(src, zero, count, false); +} + +[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, + bool dst_signed, bool src_signed) { + IR::U32 min{}; + IR::U32 max{}; + const IR::U32 zero{ir.Imm32(0)}; switch (dst_width) { case IntegerWidth::Byte: - return ir.BitFieldExtract(src, zero, ir.Imm32(8), false); + min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; + max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); + break; case IntegerWidth::Short: - return ir.BitFieldExtract(src, zero, ir.Imm32(16), false); + min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; + max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); + break; case IntegerWidth::Word: - return ir.BitFieldExtract(src, zero, ir.Imm32(32), false); + min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; + max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); + break; default: throw NotImplementedException("Invalid width {}", dst_width); } + const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; + return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); } void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { @@ -60,9 +77,6 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (i2i.cc != 0) { throw NotImplementedException("I2I CC"); } - if (i2i.sat != 0) { - throw NotImplementedException("I2I SAT"); - } if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { throw NotImplementedException("16-bit source format incompatible with selector {}", i2i.selector); @@ -75,15 +89,21 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { const s32 selector{static_cast(i2i.selector)}; const IR::U32 offset{v.ir.Imm32(selector * 8)}; const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; - IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, i2i.src_fmt_sign != 0)}; - if (i2i.abs) { + const bool src_signed{i2i.src_fmt_sign != 0}; + const bool dst_signed{i2i.dst_fmt_sign != 0}; + const bool sat{i2i.sat != 0}; + + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; + if (i2i.abs != 0) { src_values = v.ir.IAbs(src_values); } - if (i2i.neg) { + if (i2i.neg != 0) { src_values = v.ir.INeg(src_values); } + const IR::U32 result{ + sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) + : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; - const IR::U32 result{ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; v.X(i2i.dest_reg, result); } } // Anonymous namespace From 73af0d2e0d12d94b1d2dc8c0b448d0769cf111f4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 21:33:52 -0400 Subject: [PATCH 124/770] shader: Implement I2I CC --- .../backend/spirv/emit_spirv.h | 6 +- .../backend/spirv/emit_spirv_integer.cpp | 56 +++++++++++++------ .../impl/integer_to_integer_conversion.cpp | 7 ++- 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index af6b8a68f..204c5f9e0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -276,7 +276,7 @@ Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value); @@ -287,8 +287,8 @@ Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitUMin32(EmitContext& ctx, Id a, Id b); Id EmitSMax32(EmitContext& ctx, Id a, Id b); Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, Id value, Id min, Id max); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 37fc7c7a2..8bf43b91d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -5,6 +5,25 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) { + IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); + sign->Invalidate(); +} +} // Anonymous namespace Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { Id result{}; @@ -19,14 +38,8 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { } else { result = ctx.OpIAdd(ctx.U32[1], a, b); } - if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { - zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); - zero->Invalidate(); - } - if (IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}) { - sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); - sign->Invalidate(); - } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; @@ -114,16 +127,17 @@ Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); } -Id EmitBitFieldSExtract(EmitContext& ctx, Id base, Id offset, Id count) { - return ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { + const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; } Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; - if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { - zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); - zero->Invalidate(); - } + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); return result; } @@ -163,12 +177,18 @@ Id EmitUMax32(EmitContext& ctx, Id a, Id b) { return ctx.OpUMax(ctx.U32[1], a, b); } -Id EmitSClamp32(EmitContext& ctx, Id value, Id min, Id max) { - return ctx.OpSClamp(ctx.U32[1], value, min, max); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + const Id result{ctx.OpSClamp(ctx.U32[1], value, min, max)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; } -Id EmitUClamp32(EmitContext& ctx, Id value, Id min, Id max) { - return ctx.OpUClamp(ctx.U32[1], value, min, max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { + const Id result{ctx.OpUClamp(ctx.U32[1], value, min, max)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; } Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index 98b7f59f7..2f1a58805 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -74,9 +74,6 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<50, 1, u64> sat; } const i2i{insn}; - if (i2i.cc != 0) { - throw NotImplementedException("I2I CC"); - } if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { throw NotImplementedException("16-bit source format incompatible with selector {}", i2i.selector); @@ -105,6 +102,10 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; v.X(i2i.dest_reg, result); + if (i2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } } } // Anonymous namespace From 39a379632ea9f5eec9877b53668ebf385d0520bf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 01:16:16 -0300 Subject: [PATCH 125/770] shader: Fix alignment checks on RZ --- src/shader_recompiler/frontend/ir/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 3845ec5fb..a4b635792 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -309,7 +309,7 @@ constexpr Reg operator++(Reg& reg, int) { } [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { - return (RegIndex(reg) / align) * align == RegIndex(reg); + return RegIndex(reg) % align == 0 || reg == Reg::RZ; } } // namespace Shader::IR From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: [PATCH 126/770] shader: Implement BRX --- src/shader_recompiler/CMakeLists.txt | 4 + .../backend/spirv/emit_spirv.h | 3 + .../spirv/emit_spirv_context_get_set.cpp | 10 +- .../backend/spirv/emit_spirv_control_flow.cpp | 4 + src/shader_recompiler/environment.h | 2 + .../frontend/ir/ir_emitter.cpp | 12 ++ .../frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../frontend/maxwell/control_flow.cpp | 58 ++++++++-- .../frontend/maxwell/control_flow.h | 7 +- .../maxwell/indirect_branch_table_track.cpp | 108 ++++++++++++++++++ .../maxwell/indirect_branch_table_track.h | 28 +++++ .../frontend/maxwell/instruction.h | 1 + .../maxwell/structured_control_flow.cpp | 57 +++++++++ .../translate/impl/branch_indirect.cpp | 36 ++++++ .../maxwell/translate/impl/load_constant.cpp | 29 +---- .../maxwell/translate/impl/load_constant.h | 39 +++++++ .../translate/impl/not_implemented.cpp | 8 -- .../ir_opt/ssa_rewrite_pass.cpp | 21 +++- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++- 21 files changed, 437 insertions(+), 48 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 003cbefb1..44ab929b7 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -52,6 +52,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/control_flow.h frontend/maxwell/decode.cpp frontend/maxwell/decode.h + frontend/maxwell/indirect_branch_table_track.cpp + frontend/maxwell/indirect_branch_table_track.h frontend/maxwell/instruction.h frontend/maxwell/location.h frontend/maxwell/maxwell.inc @@ -63,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp + frontend/maxwell/translate/impl/branch_indirect.cpp frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/common_funcs.cpp frontend/maxwell/translate/impl/common_funcs.h @@ -110,6 +113,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp frontend/maxwell/translate/impl/load_constant.cpp + frontend/maxwell/translate/impl/load_constant.h frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_local_shared.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 204c5f9e0..02648d769 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -26,6 +26,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); @@ -35,6 +36,8 @@ void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 52dcef8a4..4a267b16c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,8 +6,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { struct AttrInfo { @@ -74,6 +72,14 @@ void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetIndirectBranchVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetIndirectBranchVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6b81f0169..335603f88 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitUnreachable(EmitContext& ctx) { + ctx.OpUnreachable(); +} + void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { ctx.OpDemoteToHelperInvocationEXT(); ctx.OpBranch(continue_label); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 9415d02f6..1c50ae51e 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -15,6 +15,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9b898e4e1..552472487 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -87,6 +87,10 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::Unreachable() { + Inst(Opcode::Unreachable); +} + void IREmitter::DemoteToHelperInvocation(Block* continue_label) { block->SetBranch(continue_label); continue_label->AddImmediatePredecessor(block); @@ -126,6 +130,14 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) { Inst(Opcode::SetGotoVariable, id, value); } +U32 IREmitter::GetIndirectBranchVariable() { + return Inst(Opcode::GetIndirectBranchVariable); +} + +void IREmitter::SetIndirectBranchVariable(const U32& value) { + Inst(Opcode::SetIndirectBranchVariable, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 269f367a4..17bc32fc8 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -37,6 +37,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void Unreachable(); void DemoteToHelperInvocation(Block* continue_label); void Prologue(); @@ -51,6 +52,9 @@ public: [[nodiscard]] U1 GetGotoVariable(u32 id); void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetIndirectBranchVariable(); + void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 52a5e5034..c3ba6b522 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,6 +55,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Prologue: case Opcode::Epilogue: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9b050995b..fb79e3d8d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) // Special operations @@ -26,6 +27,8 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetIndirectBranchVariable, U32, ) +OPCODE(SetIndirectBranchVariable, Void, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 4f6707fae..1e9b8e426 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -14,6 +14,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { @@ -252,9 +253,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Opcode opcode{Decode(inst.raw)}; switch (opcode) { case Opcode::BRA: - case Opcode::BRX: case Opcode::JMP: - case Opcode::JMX: case Opcode::RET: if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; @@ -264,10 +263,6 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::JMP: AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); break; - case Opcode::BRX: - case Opcode::JMX: - AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); - break; case Opcode::RET: block->end_class = EndClass::Return; break; @@ -302,6 +297,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::SSY: block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; + case Opcode::BRX: + case Opcode::JMX: + return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); case Opcode::PRET: @@ -407,8 +405,46 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { - throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id) { + const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + if (!brx_table) { + TrackIndirectBranchTable(env, pc, block->begin); + throw NotImplementedException("Failed to track indirect branch"); + } + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { + throw NotImplementedException("Conditional indirect branch"); + } + std::vector targets; + targets.reserve(brx_table->num_entries); + for (u32 i = 0; i < brx_table->num_entries; ++i) { + u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; + if (!is_absolute) { + target += pc.Offset(); + } + target += brx_table->branch_offset; + target += 8; + targets.push_back(target); + } + std::ranges::sort(targets); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + + block->indirect_branches.reserve(targets.size()); + for (const u32 target : targets) { + Block* const branch{AddLabel(block, block->stack, target, function_id)}; + block->indirect_branches.push_back(branch); + } + block->cond = IR::Condition{true}; + block->end = pc + 1; + block->end_class = EndClass::IndirectBranch; + block->branch_reg = brx_table->branch_reg; + block->branch_offset = brx_table->branch_offset + 8; + if (!is_absolute) { + block->branch_offset += pc.Offset(); + } + return AnalysisState::Branch; } CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, @@ -449,7 +485,6 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function // Block already exists and it has been visited return &*it; } - // TODO: FIX DANGLING BLOCKS Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, @@ -494,6 +529,11 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::IndirectBranch: + for (Block* const branch : block.indirect_branches) { + add_branch(branch, false); + } + break; case EndClass::Call: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 22f134194..1e05fcb97 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -26,6 +26,7 @@ using FunctionId = size_t; enum class EndClass { Branch, + IndirectBranch, Call, Exit, Return, @@ -76,11 +77,14 @@ struct Block : boost::intrusive::set_base_hook< union { Block* branch_true; FunctionId function_call; + IR::Reg branch_reg; }; union { Block* branch_false; Block* return_block; + s32 branch_offset; }; + std::vector indirect_branches; }; struct Label { @@ -139,7 +143,8 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..96453509d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +namespace { +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 19, u64> immediate; + BitField<56, 1, u64> is_negative; + BitField<20, 24, s64> brx_offset; +}; + +template +std::optional Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { + while (pos >= block_begin) { + const u64 insn{env.ReadInstruction(pos.Offset())}; + --pos; + if (func(insn, Decode(insn))) { + return insn; + } + } + return std::nullopt; +} + +std::optional TrackLDC(Environment& env, Location block_begin, Location& pos, + IR::Reg brx_reg) { + return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { + const LDC::Encoding ldc{insn}; + return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && + ldc.mode == LDC::Mode::Default; + }); +} + +std::optional TrackSHL(Environment& env, Location block_begin, Location& pos, + IR::Reg ldc_reg) { + return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { + const Encoding shl{insn}; + return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; + }); +} + +std::optional TrackIMNMX(Environment& env, Location block_begin, Location& pos, + IR::Reg shl_reg) { + return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { + const Encoding imnmx{insn}; + return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; + }); +} +} // Anonymous namespace + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin) { + const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; + const Opcode brx_opcode{Decode(brx_insn)}; + if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { + throw LogicError("Tracked instruction is not BRX or JMX"); + } + const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; + const s32 brx_offset{static_cast(Encoding{brx_insn}.brx_offset)}; + + Location pos{brx_pos}; + const std::optional ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; + if (!ldc_insn) { + return std::nullopt; + } + const LDC::Encoding ldc{*ldc_insn}; + const u32 cbuf_index{static_cast(ldc.index)}; + const u32 cbuf_offset{static_cast(static_cast(ldc.offset.Value()))}; + const IR::Reg ldc_reg{ldc.src_reg}; + + const std::optional shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; + if (!shl_insn) { + return std::nullopt; + } + const Encoding shl{*shl_insn}; + const IR::Reg shl_reg{shl.src_reg}; + + const std::optional imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; + if (!imnmx_insn) { + return std::nullopt; + } + const Encoding imnmx{*imnmx_insn}; + if (imnmx.is_negative != 0) { + return std::nullopt; + } + const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; + return IndirectBranchTableInfo{ + .cbuf_index{cbuf_index}, + .cbuf_offset{cbuf_offset}, + .num_entries{imnmx_immediate + 1}, + .branch_offset{brx_offset}, + .branch_reg{brx_reg}, + }; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell { + +struct IndirectBranchTableInfo { + u32 cbuf_index{}; + u32 cbuf_offset{}; + u32 num_entries{}; + s32 branch_offset{}; + IR::Reg branch_reg{}; +}; + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h index 57fd531f2..743d68d61 100644 --- a/src/shader_recompiler/frontend/maxwell/instruction.h +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/reg.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 9d4688390..a6e55f61e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -17,6 +17,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/object_pool.h" @@ -46,12 +47,15 @@ enum class StatementType { Break, Return, Kill, + Unreachable, Function, Identity, Not, Or, SetVariable, + SetIndirectBranchVariable, Variable, + IndirectBranchCond, }; bool HasChildren(StatementType type) { @@ -72,12 +76,15 @@ struct Loop {}; struct Break {}; struct Return {}; struct Kill {}; +struct Unreachable {}; struct FunctionTag {}; struct Identity {}; struct Not {}; struct Or {}; struct SetVariable {}; +struct SetIndirectBranchVariable {}; struct Variable {}; +struct IndirectBranchCond {}; #ifdef _MSC_VER #pragma warning(push) @@ -96,6 +103,7 @@ struct Statement : ListBaseHook { : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} Statement(Kill) : type{StatementType::Kill} {} + Statement(Unreachable) : type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} @@ -103,7 +111,12 @@ struct Statement : ListBaseHook { : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + : branch_offset{branch_offset_}, + branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_) + : location{location_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -118,11 +131,14 @@ struct Statement : ListBaseHook { IR::Condition guest_cond; Statement* op; Statement* op_a; + u32 location; + s32 branch_offset; }; union { Statement* cond; Statement* op_b; u32 id; + IR::Reg branch_reg; }; Statement* up{}; StatementType type; @@ -141,6 +157,8 @@ std::string DumpExpr(const Statement* stmt) { return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); case StatementType::Variable: return fmt::format("goto_L{}", stmt->id); + case StatementType::IndirectBranchCond: + return fmt::format("(indirect_branch == {:x})", stmt->location); default: return ""; } @@ -182,14 +200,22 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { case StatementType::Kill: ret += fmt::format("{} kill;\n", indent); break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; case StatementType::SetVariable: ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); break; + case StatementType::SetIndirectBranchVariable: + ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, + stmt->branch_offset); + break; case StatementType::Function: case StatementType::Identity: case StatementType::Not: case StatementType::Or: case StatementType::Variable: + case StatementType::IndirectBranchCond: throw LogicError("Statement can't be printed"); } } @@ -417,6 +443,17 @@ private: } break; } + case Flow::EndClass::IndirectBranch: + root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, + block.branch_offset)); + for (Flow::Block* const branch : block.indirect_branches) { + const Node indirect_label{local_labels.at(branch)}; + Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + } + root.insert(ip, *pool.Create(Unreachable{})); + break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; const Node call_return_label{local_labels.at(block.return_block)}; @@ -623,6 +660,8 @@ IR::Block* TryFindForwardBlock(const Statement& stmt) { return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); case StatementType::Variable: return ir.GetGotoVariable(stmt.id); + case StatementType::IndirectBranchCond: + return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); default: throw NotImplementedException("Statement type {}", stmt.type); } @@ -670,6 +709,15 @@ private: ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } + case StatementType::SetIndirectBranchVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; + ir.SetIndirectBranchVariable(address); + break; + } case StatementType::If: { if (!current_block) { current_block = block_pool.Create(inst_pool); @@ -756,6 +804,15 @@ private: current_block = demote_block; break; } + case StatementType::Unreachable: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Unreachable(); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 39becf93c..49ccb7d62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -5,25 +5,11 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" namespace Shader::Maxwell { +using namespace LDC; namespace { -enum class Mode : u64 { - Default, - IL, - IS, - ISL, -}; - -enum class Size : u64 { - U8, - S8, - U16, - S16, - B32, - B64, -}; - std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, const IR::U32& reg, const IR::U32& imm) { switch (mode) { @@ -37,16 +23,7 @@ std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& im } // Anonymous namespace void TranslatorVisitor::LDC(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_reg; - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - BitField<44, 2, Mode> mode; - BitField<48, 3, Size> size; - } const ldc{insn}; - + const Encoding ldc{insn}; const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; const IR::U32 reg{X(ldc.src_reg)}; const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b62d8ee2a..a0057a473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -53,10 +53,6 @@ void TranslatorVisitor::BRK(u64) { ThrowNotImplemented(Opcode::BRK); } -void TranslatorVisitor::BRX(u64) { - ThrowNotImplemented(Opcode::BRX); -} - void TranslatorVisitor::CAL() { // CAL is a no-op } @@ -181,10 +177,6 @@ void TranslatorVisitor::JMP(u64) { ThrowNotImplemented(Opcode::JMP); } -void TranslatorVisitor::JMX(u64) { - ThrowNotImplemented(Opcode::JMX); -} - void TranslatorVisitor::KIL() { // KIL is a no-op } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bab7ca186..259233746 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,8 +48,12 @@ struct GotoVariable : FlagTag { u32 index; }; +struct IndirectBranchVariable { + auto operator<=>(const IndirectBranchVariable&) const noexcept = default; +}; + using Variant = std::variant; + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -65,6 +69,10 @@ struct DefTable { return goto_vars[goto_variable.index]; } + [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) { + return indirect_branch_var; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -84,6 +92,7 @@ struct DefTable { std::array regs; std::array preds; boost::container::flat_map goto_vars; + ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -102,6 +111,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { + return IR::Opcode::UndefU32; +} + [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { return inst.Opcode() == IR::Opcode::Phi; } @@ -219,6 +232,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetIndirectBranchVariable: + pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -244,6 +260,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetIndirectBranchVariable: + inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8b2816c13..6cde01491 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { } u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | static_cast(offset); + return (static_cast(index) << 32) | offset; } class GenericEnvironment : public Shader::Environment { @@ -114,11 +114,13 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) @@ -130,6 +132,10 @@ public: file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -212,6 +218,7 @@ protected: std::vector code; std::unordered_map texture_types; + std::unordered_map cbuf_values; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -267,6 +274,17 @@ public: ~GraphicsEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{maxwell3d->regs}; const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; @@ -312,6 +330,18 @@ public: ~ComputeEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; @@ -386,8 +416,10 @@ public: void Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_cbuf_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) @@ -403,6 +435,13 @@ public: .read(reinterpret_cast(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -418,6 +457,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; if (it == texture_types.end()) { @@ -445,6 +492,7 @@ public: private: std::unique_ptr code; std::unordered_map texture_types; + std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; u32 shared_memory_size{}; From 6c51f496320f698e123207c09ca61e55180a31b5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 22:23:45 -0400 Subject: [PATCH 127/770] shader: Implement FSWZADD --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 8 ++++ .../backend/spirv/emit_context.h | 2 + .../backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_warp.cpp | 16 +++++++ .../frontend/ir/ir_emitter.cpp | 3 ++ .../frontend/ir/ir_emitter.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../impl/floating_point_swizzled_add.cpp | 44 +++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 4 ++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../translate/impl/not_implemented.cpp | 4 -- .../ir_opt/collect_shader_info_pass.cpp | 3 ++ src/shader_recompiler/shader_info.h | 1 + 14 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 44ab929b7..5ce420cbf 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -89,6 +89,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/floating_point_set_predicate.cpp + frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/half_floating_point_helper.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 96d0e9b4d..7531f8b21 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -393,6 +393,14 @@ void EmitContext::DefineInputs(const Info& info) { subgroup_local_invocation_id = DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); } + if (info.uses_fswzadd) { + const Id f32_one{Constant(F32[1], 1.0f)}; + const Id f32_minus_one{Constant(F32[1], -1.0f)}; + const Id f32_zero{Constant(F32[1], 0.0f)}; + fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); + fswzadd_lut_b = + ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 1a4e8221a..ffac39c4f 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -103,6 +103,8 @@ public: Id vertex_index{}; Id base_vertex{}; Id front_face{}; + Id fswzadd_lut_a{}; + Id fswzadd_lut_b{}; Id local_memory{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 02648d769..3d0c6f7ba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -397,5 +397,6 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam Id segmentation_mask); Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 44d8a347f..cbc5b1c96 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -132,4 +132,20 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id return SelectValue(ctx, in_range, value, src_thread_id); } +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { + const Id three{ctx.Constant(ctx.U32[1], 3)}; + Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1)); + mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + + const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; + const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; + + const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; + const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; + return ctx.OpFAdd(ctx.F32[1], result_a, result_b); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 552472487..505fba46a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons const IR::U32& seg_mask) { return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); } +F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { + return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); +} } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 17bc32fc8..8f3325738 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -277,6 +277,8 @@ public: const IR::U32& seg_mask); [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask); + [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, + FpControl control = {}); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index fb79e3d8d..717aa71ca 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -408,3 +408,4 @@ OPCODE(ShuffleIndex, U32, U32, OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) +OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..e42921a21 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::FSWZADD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<28, 8, u64> swizzle; + BitField<38, 1, u64> ndv; + BitField<39, 2, FpRounding> round; + BitField<44, 1, u64> ftz; + BitField<47, 1, u64> cc; + } const fswzadd{insn}; + + if (fswzadd.ndv != 0) { + throw NotImplementedException("FSWZADD NDV"); + } + + const IR::F32 src_a{GetFloatReg8(insn)}; + const IR::F32 src_b{GetFloatReg20(insn)}; + const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{CastFpRounding(fswzadd.round)}, + .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; + F(fswzadd.dest_reg, result); + + if (fswzadd.cc != 0) { + throw NotImplementedException("FSWZADD CC"); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 9bae89c10..30b570ce4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { + return ir.BitCast(GetReg8(insn)); +} + IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 54c31deb4..bf7d1bae8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -353,6 +353,7 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0057a473..6a580f831 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSWZADD(u64) { - ThrowNotImplemented(Opcode::FSWZADD); -} - void TranslatorVisitor::GETCRSPTR(u64) { ThrowNotImplemented(Opcode::GETCRSPTR); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c932c307b..81090335f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SubgroupBallot: info.uses_subgroup_vote = true; break; + case IR::Opcode::FSwizzleAdd: + info.uses_fswzadd = true; + break; default: break; } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 9111159f3..4b4006b7f 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -94,6 +94,7 @@ struct Info { bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; bool uses_subgroup_vote{}; + bool uses_fswzadd{}; IR::Type used_constant_buffer_types{}; From 12783f8105e06c8aebcfccf29921441552b991f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 16:58:42 -0300 Subject: [PATCH 128/770] shader: Add missing new lines --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 505fba46a..6e7dddead 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1602,7 +1602,9 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons const IR::U32& seg_mask) { return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); } + F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); } + } // namespace Shader::IR From 55b960a20f180a7529e15082023cb181d307110e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 22:12:52 -0300 Subject: [PATCH 129/770] spirv: Fix default output attribute initialization --- src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 70ae7b51e..44d2fde02 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -10,11 +10,11 @@ void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB) { const Id zero{ctx.Constant(ctx.F32[1], 0.0f)}; const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; - const Id null_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, zero)}; - ctx.OpStore(ctx.output_position, ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)); + const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; + ctx.OpStore(ctx.output_position, default_vector); for (const Id generic_id : ctx.output_generics) { if (Sirit::ValidId(generic_id)) { - ctx.OpStore(generic_id, null_vector); + ctx.OpStore(generic_id, default_vector); } } } From b0d5572abfe1f14e02d8219f0a4d7dd09ff36fd1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 22:13:37 -0300 Subject: [PATCH 130/770] shader: Fix indirect branches to scheduler instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 ++++++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 9 ++++++++- .../frontend/maxwell/structured_control_flow.cpp | 6 +++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1e9b8e426..784f9df8a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -434,7 +434,10 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, block->indirect_branches.reserve(targets.size()); for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; - block->indirect_branches.push_back(branch); + block->indirect_branches.push_back({ + .block{branch}, + .address{target}, + }); } block->cond = IR::Condition{true}; block->end = pc + 1; @@ -530,8 +533,8 @@ std::string CFG::Dot() const { } break; case EndClass::IndirectBranch: - for (Block* const branch : block.indirect_branches) { - add_branch(branch, false); + for (const IndirectBranch& branch : block.indirect_branches) { + add_branch(branch.block, false); } break; case EndClass::Call: diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 1e05fcb97..a8c90d27a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -22,6 +22,8 @@ namespace Shader::Maxwell::Flow { +struct Block; + using FunctionId = size_t; enum class EndClass { @@ -60,6 +62,11 @@ private: boost::container::small_vector entries; }; +struct IndirectBranch { + Block* block; + u32 address; +}; + struct Block : boost::intrusive::set_base_hook< // Normal link is ~2.5% faster compared to safe link boost::intrusive::link_mode> { @@ -84,7 +91,7 @@ struct Block : boost::intrusive::set_base_hook< Block* return_block; s32 branch_offset; }; - std::vector indirect_branches; + std::vector indirect_branches; }; struct Label { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index a6e55f61e..c804c2a8e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -446,9 +446,9 @@ private: case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, block.branch_offset)); - for (Flow::Block* const branch : block.indirect_branches) { - const Node indirect_label{local_labels.at(branch)}; - Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + for (const Flow::IndirectBranch& indirect : block.indirect_branches) { + const Node indirect_label{local_labels.at(indirect.block)}; + Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } From 514a6b07eedace58b4a0c95282bdfc729623d1d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 03:19:50 -0300 Subject: [PATCH 131/770] shader: Store type of phi nodes in flags This is needed because pseudo-instructions where invalidated. --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 ++- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ++++ src/shader_recompiler/frontend/ir/value.cpp | 6 +++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2e7e6bb0c..6389d80bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -288,7 +288,8 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { operands.push_back(PhiArgDef(ctx, inst, index)); operands.push_back(inst->PhiBlock(index)->Definition()); } - const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; + // The type of a phi instruction is stored in its flags + const Id result_type{TypeId(ctx, inst->Flags())}; return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index c3ba6b522..074c71d53 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -193,6 +193,10 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } + if (Flags() == IR::Type::Void) { + // Set the type of the phi node + SetFlags(value.Type()); + } phi_args.emplace_back(predecessor, value); } diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index e8e4662e7..837c1b487 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -56,7 +56,11 @@ bool Value::IsLabel() const noexcept { } IR::Type Value::Type() const noexcept { - if (IsIdentity() || IsPhi()) { + if (IsPhi()) { + // The type of a phi node is stored in its flags + return inst->Flags(); + } + if (IsIdentity()) { return inst->Arg(0).Type(); } if (type == Type::Opaque) { From b7589fe1154d9e810e83f6c609dad1d646ec0359 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 26 Mar 2021 18:52:06 -0400 Subject: [PATCH 132/770] shader: Add PointSize attribute --- src/shader_recompiler/backend/spirv/emit_context.cpp | 6 ++++++ src/shader_recompiler/backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 2 ++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/shader_info.h | 1 + 5 files changed, 13 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 7531f8b21..ecee1220e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -492,6 +492,12 @@ void EmitContext::DefineOutputs(const Info& info) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); } + if (info.stores_point_size) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing PointSize in Fragment stage"); + } + output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); + } for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (info.stores_generics[i]) { output_generics[i] = DefineOutput(*this, F32[4]); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index ffac39c4f..97e055db4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -120,6 +120,7 @@ public: Id input_position{}; std::array input_generics{}; + Id output_point_size{}; Id output_position{}; std::array output_generics{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4a267b16c..c870fac47 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -37,6 +37,8 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); } switch (attr) { + case IR::Attribute::PointSize: + return ctx.output_point_size; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 81090335f..a47d54b9c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -58,6 +58,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { return; } switch (attribute) { + case IR::Attribute::PointSize: + info.stores_point_size = true; + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4b4006b7f..3d8e08909 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -79,6 +79,7 @@ struct Info { bool stores_frag_depth{}; std::array stores_generics{}; bool stores_position{}; + bool stores_point_size{}; bool uses_fp16{}; bool uses_fp64{}; From 9d7422d967a97fea7888449652ad93da88e92b54 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 20:05:38 +0200 Subject: [PATCH 133/770] shader: Add PointCoord attribute --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 +++ src/shader_recompiler/backend/spirv/emit_context.h | 2 ++ .../backend/spirv/emit_spirv_context_get_set.cpp | 6 ++++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 4 ++++ src/shader_recompiler/shader_info.h | 1 + 5 files changed, 16 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ecee1220e..2c93bada5 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -425,6 +425,9 @@ void EmitContext::DefineInputs(const Info& info) { if (info.loads_front_face) { front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); } + if (info.loads_point_coord) { + point_coord = DefineInput(*this, F32[2], spv::BuiltIn::PointCoord); + } for (size_t index = 0; index < info.input_generics.size(); ++index) { const InputVarying generic{info.input_generics[index]}; if (!generic.used) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 97e055db4..071e66c2a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -103,6 +103,8 @@ public: Id vertex_index{}; Id base_vertex{}; Id front_face{}; + Id point_coord{}; + Id fswzadd_lut_a{}; Id fswzadd_lut_b{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index c870fac47..d02761f32 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -179,6 +179,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), ctx.Constant(ctx.U32[1], std::numeric_limits::max()), ctx.u32_zero_value); + case IR::Attribute::PointSpriteS: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, + ctx.Constant(ctx.U32[1], 0U))); + case IR::Attribute::PointSpriteT: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, + ctx.Constant(ctx.U32[1], 1U))); default: throw NotImplementedException("Read attribute {}", attr); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a47d54b9c..eb3d1343f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -47,6 +47,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::FrontFace: info.loads_front_face = true; break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + info.loads_point_coord = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3d8e08909..c9f6d9ef7 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -74,6 +74,7 @@ struct Info { bool loads_instance_id{}; bool loads_vertex_id{}; bool loads_front_face{}; + bool loads_point_coord{}; std::array stores_frag_color{}; bool stores_frag_depth{}; From 7a1c14269e20cffeed780f388c90a86f8bba1a92 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 03:58:46 -0300 Subject: [PATCH 134/770] spirv: Add fixed pipeline point size --- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | 4 ++++ src/shader_recompiler/profile.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2c93bada5..5cd505d99 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -495,7 +495,7 @@ void EmitContext::DefineOutputs(const Info& info) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); } - if (info.stores_point_size) { + if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in Fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 44d2fde02..5f80c189f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -17,6 +17,10 @@ void EmitPrologue(EmitContext& ctx) { ctx.OpStore(generic_id, default_vector); } } + if (ctx.profile.fixed_state_point_size) { + const float point_size{*ctx.profile.fixed_state_point_size}; + ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); + } } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 0276fc23b..f4b94896c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" @@ -41,6 +42,8 @@ struct Profile { std::array generic_input_types{}; bool convert_depth_mode{}; + + std::optional fixed_state_point_size; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6cde01491..eb4df9000 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -864,6 +864,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; if (stage == Shader::Stage::VertexB) { profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); } From dc1a9a3bed2aa9b0851f07976b0c687172aa3edc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 20:51:05 +0100 Subject: [PATCH 135/770] shader: Implement TLD --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv_image.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- .../frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_load.cpp | 165 ++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 1 + src/video_core/memory_manager.cpp | 3 +- 8 files changed, 174 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 5ce420cbf..4b4c43ba8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_load.cpp frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/video_helper.cpp frontend/maxwell/translate/impl/video_helper.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 3ea0011aa..310cc7af7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -254,7 +254,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c const auto info{inst->Flags()}; const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, index), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 717aa71ca..302b8471d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -378,7 +378,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -387,7 +387,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -396,7 +396,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) // Warp operations diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index d668dc1aa..b47fb9c2e 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -252,8 +252,8 @@ INST(SYNC, "SYNC", "1111 0000 1111 1---") INST(TEX, "TEX", "1100 0--- ---- ----") INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") -INST(TLD, "TLD", "1101 1100 --11 1---") -INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD, "TLD", "1101 1100 ---- ----") +INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") INST(TLD4, "TLD4", "1100 10-- ---- ----") INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6a580f831..60d61ec6e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLD(u64) { - ThrowNotImplemented(Opcode::TLD); -} - -void TranslatorVisitor::TLD_b(u64) { - ThrowNotImplemented(Opcode::TLD_b); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..b4063fa6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{ + [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; + switch (type) { + case TextureType::_1D: + return v.X(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<55, 1, u64> lod; + BitField<50, 1, u64> multisample; + BitField<35, 1, u64> aoffi; + BitField<54, 1, u64> clamp; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld{insn}; + + const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; + + IR::Reg meta_reg{tld.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::U32 lod; + IR::U32 multisample; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + if (tld.lod != 0) { + lod = v.X(meta_reg++); + } + if (tld.aoffi != 0) { + offset = MakeOffset(v, meta_reg, tld.type); + } + if (tld.multisample != 0) { + multisample = v.X(meta_reg++); + } + if (tld.clamp != 0) { + throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld.type, false)); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); + }()}; + + IR::Reg dest_reg{tld.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TLD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index eb3d1343f..3b00d7c8c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -382,6 +382,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d2b9d5f2b..05e27c687 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -64,12 +64,11 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end()) { - ASSERT(it->first == gpu_addr); + // ASSERT(it->first == gpu_addr); map_ranges.erase(it); } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); for (const auto& map : submapped_ranges) { From 2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 01:45:20 +0100 Subject: [PATCH 136/770] shader: Implement TLDS --- src/shader_recompiler/CMakeLists.txt | 1 + .../translate/impl/not_implemented.cpp | 4 - .../translate/impl/texture_load_swizzled.cpp | 252 ++++++++++++++++++ 3 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 4b4c43ba8..d3afd7d31 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_load_swizzled.cpp frontend/maxwell/translate/impl/texture_load.cpp frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/video_helper.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 60d61ec6e..7e1ad63e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,10 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLDS(u64) { - ThrowNotImplemented(Opcode::TLDS); -} - void TranslatorVisitor::TMML(u64) { ThrowNotImplemented(Opcode::TMML); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..3e6ebd911 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -0,0 +1,252 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<54, 1, u64> aoffi; + BitField<53, 1, u64> lod; + BitField<55, 1, u64> ms; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; + BitField<53, 4, u64> encoding; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tlds{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tlds.cbuf_offset * 4))}; + const IR::Reg reg_a{tlds.src_reg_a}; + const IR::Reg reg_b{tlds.src_reg_b}; + IR::Value coords; + IR::U32 lod; + IR::Value offsets; + IR::U32 multisample; + Shader::TextureType texture_type; + switch (tlds.encoding) { + case 0: { + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + break; + } + case 1: { + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + lod = v.X(reg_b); + break; + } + case 2: { + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); + break; + } + case 4: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + offsets = MakeOffset(v, reg_b); + break; + } + case 5: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + break; + } + case 6: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + multisample = v.X(reg_b); + break; + } + case 7: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color3D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); + break; + } + case 8: { + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::ColorArray2D; + IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); + coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); + break; + } + case 12: { + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + offsets = MakeOffset(v, reg_b + 1); + break; + } + default: { + throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); + break; + } + } + IR::TextureInstInfo info{}; + if (tlds.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.type.Assign(texture_type); + return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); +} + +unsigned Swizzle(u64 insn) { + const Encoding tlds{insn}; + const size_t encoding{tlds.swizzle}; + if (tlds.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + return IR::F32{v.ir.CompositeExtract(sample, component)}; +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding tlds{insn}; + switch (index) { + case 0: + return tlds.dest_reg_a; + case 1: + CheckAlignment(tlds.dest_reg_a, 2); + return tlds.dest_reg_a + 1; + case 2: + return tlds.dest_reg_b; + case 3: + CheckAlignment(tlds.dest_reg_b, 2); + return tlds.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding tlds{insn}; + switch (store_index) { + case 1: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLDS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} +} // namespace Shader::Maxwell From 613b48c4a2ce71a0d0eaba17fe164f4a2e4a3db5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Mar 2021 19:47:52 +0200 Subject: [PATCH 137/770] shader,spirv: Implement ImageQueryLod. --- .../backend/spirv/emit_context.cpp | 1 + .../backend/spirv/emit_context.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 3 +++ .../backend/spirv/emit_spirv_image.cpp | 15 +++++++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 3 +++ .../ir_opt/collect_shader_info_pass.cpp | 3 ++- src/shader_recompiler/ir_opt/texture_pass.cpp | 5 +++++ 9 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 5cd505d99..c8ce58254 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -182,6 +182,7 @@ void EmitContext::DefineCommonConstants() { true_value = ConstantTrue(U1); false_value = ConstantFalse(U1); u32_zero_value = Constant(U32[1], 0U); + f32_zero_value = Constant(F32[1], 0.0f); } void EmitContext::DefineInterfaces(const Info& info) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 071e66c2a..3965869f0 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -70,6 +70,7 @@ public: Id true_value{}; Id false_value{}; Id u32_zero_value{}; + Id f32_zero_value{}; UniformDefinitions uniform_types; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 3d0c6f7ba..105c23745 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -362,6 +362,7 @@ Id EmitBindlessImageGather(EmitContext&); Id EmitBindlessImageGatherDref(EmitContext&); Id EmitBindlessImageFetch(EmitContext&); Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); @@ -370,6 +371,7 @@ Id EmitBoundImageGather(EmitContext&); Id EmitBoundImageGatherDref(EmitContext&); Id EmitBoundImageFetch(EmitContext&); Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -385,6 +387,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 310cc7af7..2cd6b38c4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -161,6 +161,10 @@ Id EmitBindlessImageQueryDimensions(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -193,6 +197,10 @@ Id EmitBoundImageQueryDimensions(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -287,4 +295,11 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i throw LogicError("Unspecified image type {}", info.type.Value()); } +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coords) { + const Id zero{ctx.f32_zero_value}; + const Id image{TextureImage(ctx, index)}; + return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], image, coords), + zero, zero); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6e7dddead..ba9591727 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1567,6 +1567,12 @@ Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { return Inst(op, handle, lod); } +Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod + : Opcode::BindlessImageQueryLod}; + return Inst(op, handle, coords); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8f3325738..9e752b208 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -255,6 +255,8 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); + [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords); + [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 302b8471d..49cdcd57f 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -380,6 +380,7 @@ OPCODE(BindlessImageGather, F32x4, U32, OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -389,6 +390,7 @@ OPCODE(BoundImageGather, F32x4, U32, OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -398,6 +400,7 @@ OPCODE(ImageGather, F32x4, U32, OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 3b00d7c8c..04e3a4f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -383,7 +383,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: - case IR::Opcode::ImageQueryDimensions: { + case IR::Opcode::ImageQueryDimensions: + case IR::Opcode::ImageQueryLod: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index dfacf848f..6eb286b83 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -57,6 +57,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BindlessImageQueryDimensions: return IR::Opcode::ImageQueryDimensions; + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BindlessImageQueryLod: + return IR::Opcode::ImageQueryLod; default: return IR::Opcode::Void; } @@ -72,6 +75,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BindlessImageFetch: case IR::Opcode::BindlessImageQueryDimensions: + case IR::Opcode::BindlessImageQueryLod: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -81,6 +85,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGatherDref: case IR::Opcode::BoundImageFetch: case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BoundImageQueryLod: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); From be3e94ae55184933e0f1f5fb55698513f7936382 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Mar 2021 21:25:08 +0200 Subject: [PATCH 138/770] shader: Implement TMML partially --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv_image.cpp | 4 +- .../frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/ir_emitter.h | 3 +- .../translate/impl/not_implemented.cpp | 8 -- .../translate/impl/texture_mipmap_level.cpp | 130 ++++++++++++++++++ 6 files changed, 137 insertions(+), 13 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d3afd7d31..d9a2b9cb4 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -135,6 +135,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_gather.cpp frontend/maxwell/translate/impl/texture_load_swizzled.cpp frontend/maxwell/translate/impl/texture_load.cpp + frontend/maxwell/translate/impl/texture_mipmap_level.cpp frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/video_helper.cpp frontend/maxwell/translate/impl/video_helper.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 2cd6b38c4..9f8fe2603 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -297,8 +297,8 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coords) { const Id zero{ctx.f32_zero_value}; - const Id image{TextureImage(ctx, index)}; - return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], image, coords), + const Id sampler{Texture(ctx, index)}; + return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), zero, zero); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ba9591727..f6818ec8a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1567,10 +1567,10 @@ Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { return Inst(op, handle, lod); } -Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords) { +Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod : Opcode::BindlessImageQueryLod}; - return Inst(op, handle, coords); + return Inst(op, Flags{info}, handle, coords); } U1 IREmitter::VoteAll(const U1& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 9e752b208..2beeace8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -255,7 +255,8 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); - [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords); + [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, + TextureInstInfo info); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 7e1ad63e1..9f5ea7775 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TMML(u64) { - ThrowNotImplemented(Opcode::TMML); -} - -void TranslatorVisitor::TMML_b(u64) { - ThrowNotImplemented(Opcode::TMML_b); -} - void TranslatorVisitor::TXA(u64) { ThrowNotImplemented(Opcode::TXA); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..ee13ede30 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -0,0 +1,130 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> ndv; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tmml{insn}; + + if ((tmml.mask & 0xC) != 0) { + throw NotImplementedException("TMML BA results are not implmented"); + } + + IR::F32 transform_constant = v.ir.Imm32(256.0f); + + const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; + + IR::U32 handle; + IR::Reg meta_reg{tmml.meta_reg}; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tmml.type, false)); + const IR::Value sample{ + [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RP}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + IR::Reg dest_reg{tmml.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tmml.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; + v.F(dest_reg, + element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TMML(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TMML_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell From d5bfc630886d98ed77959a9771c67293244aff0e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 02:00:43 +0200 Subject: [PATCH 139/770] shader: Implement ImageGradient --- .../backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_image.cpp | 51 ++++++++++++++++++- .../frontend/ir/ir_emitter.cpp | 7 +++ .../frontend/ir/ir_emitter.h | 4 ++ src/shader_recompiler/frontend/ir/modifiers.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 ++ .../ir_opt/collect_shader_info_pass.cpp | 11 +++- src/shader_recompiler/ir_opt/texture_pass.cpp | 5 ++ 8 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 105c23745..9c9e0c5dd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -363,6 +363,7 @@ Id EmitBindlessImageGatherDref(EmitContext&); Id EmitBindlessImageFetch(EmitContext&); Id EmitBindlessImageQueryDimensions(EmitContext&); Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); @@ -372,6 +373,7 @@ Id EmitBoundImageGatherDref(EmitContext&); Id EmitBoundImageFetch(EmitContext&); Id EmitBoundImageQueryDimensions(EmitContext&); Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -388,6 +390,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 9f8fe2603..1eba9cc00 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -69,12 +69,44 @@ public: } } + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, + Id offset, Id lod_clamp) { + if (Sirit::ValidId(derivates)) { + boost::container::static_vector deriv_x_accum; + boost::container::static_vector deriv_y_accum; + for (size_t i = 0; i < num_derivates; i++) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + } + Id derivates_X = ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()}); + Id derivates_Y = ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()}); + Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); + } else { + throw LogicError("Derivates must be present"); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + void Add(spv::ImageOperandsMask new_mask, Id value) { mask = static_cast(static_cast(mask) | static_cast(new_mask)); operands.push_back(value); } + void Add(spv::ImageOperandsMask new_mask, Id value, Id value_2) { + mask = static_cast(static_cast(mask) | + static_cast(new_mask)); + operands.push_back(value); + operands.push_back(value_2); + } + std::span Span() const noexcept { return std::span{operands.data(), operands.size()}; } @@ -84,7 +116,7 @@ public: } private: - boost::container::static_vector operands; + boost::container::static_vector operands; spv::ImageOperandsMask mask{}; }; @@ -165,6 +197,10 @@ Id EmitBindlessImageQueryLod(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -201,6 +237,10 @@ Id EmitBoundImageQueryLod(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -302,4 +342,13 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coo zero, zero); } +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f6818ec8a..edf8c05d4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1573,6 +1573,13 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture return Inst(op, Flags{info}, handle, coords); } +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, + const Value& offset, const F32& lod_clamp, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient + : Opcode::BindlessImageGradient}; + return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2beeace8f..a4616e247 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -268,6 +268,10 @@ public: [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, + const Value& derivates, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 4f09a4b39..90078f535 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -39,6 +39,7 @@ union TextureInstInfo { BitField<9, 1, u32> has_lod_clamp; BitField<10, 1, u32> relaxed_precision; BitField<11, 2, u32> gather_component; + BitField<13, 2, u32> num_derivates; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 49cdcd57f..79baacd08 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -381,6 +381,7 @@ OPCODE(BindlessImageGatherDref, F32x4, U32, OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -391,6 +392,7 @@ OPCODE(BoundImageGatherDref, F32x4, U32, OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -401,6 +403,7 @@ OPCODE(ImageGatherDref, F32x4, U32, OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, F32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 04e3a4f53..730d3e91e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -370,12 +370,20 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::BindlessImageSampleDrefExplicitLod: case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: + case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: + case IR::Opcode::BindlessImageQueryLod: + case IR::Opcode::BindlessImageGradient: case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: case IR::Opcode::BoundImageSampleDrefImplicitLod: case IR::Opcode::BoundImageSampleDrefExplicitLod: case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: + case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BoundImageGradient: case IR::Opcode::ImageSampleImplicitLod: case IR::Opcode::ImageSampleExplicitLod: case IR::Opcode::ImageSampleDrefImplicitLod: @@ -384,7 +392,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: - case IR::Opcode::ImageQueryLod: { + case IR::Opcode::ImageQueryLod: + case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 6eb286b83..da8977b76 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -60,6 +60,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BindlessImageQueryLod: return IR::Opcode::ImageQueryLod; + case IR::Opcode::BoundImageGradient: + case IR::Opcode::BindlessImageGradient: + return IR::Opcode::ImageGradient; default: return IR::Opcode::Void; } @@ -76,6 +79,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageFetch: case IR::Opcode::BindlessImageQueryDimensions: case IR::Opcode::BindlessImageQueryLod: + case IR::Opcode::BindlessImageGradient: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -86,6 +90,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageFetch: case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: + case IR::Opcode::BoundImageGradient: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); From 630273b6295f524401abf1c131dba09fdd055911 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 02:52:52 +0200 Subject: [PATCH 140/770] shader: Implement TXD --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/not_implemented.cpp | 8 - .../translate/impl/texture_gradient.cpp | 180 ++++++++++++++++++ 4 files changed, 183 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d9a2b9cb4..03a5793aa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_gradient.cpp frontend/maxwell/translate/impl/texture_load_swizzled.cpp frontend/maxwell/translate/impl/texture_load.cpp frontend/maxwell/translate/impl/texture_mipmap_level.cpp diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index b47fb9c2e..c759bd4d4 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -261,8 +261,8 @@ INST(TLDS, "TLDS", "1101 -01- ---- ----") INST(TMML, "TMML", "1101 1111 0101 1---") INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") INST(TXA, "TXA", "1101 1111 0100 0---") -INST(TXD, "TXD", "1101 1110 0011 10--") -INST(TXD_b, "TXD (b)", "1101 1110 0111 10--") +INST(TXD, "TXD", "1101 1110 00-- ----") +INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") INST(TXQ, "TXQ", "1101 1111 0100 1---") INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9f5ea7775..ba526817a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -317,14 +317,6 @@ void TranslatorVisitor::TXA(u64) { ThrowNotImplemented(Opcode::TXA); } -void TranslatorVisitor::TXD(u64) { - ThrowNotImplemented(Opcode::TXD); -} - -void TranslatorVisitor::TXD_b(u64) { - ThrowNotImplemented(Opcode::TXD_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..00768e167 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -0,0 +1,180 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { + const IR::U32 value{v.X(reg)}; + const u32 base = has_lod_clamp ? 12 : 16; + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> aoffi; + BitField<50, 1, u64> lc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> derivate_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const txd{insn}; + + const bool has_lod_clamp = txd.lc != 0; + if (has_lod_clamp) { + throw NotImplementedException("TXD.LC - CLAMP is not implemented"); + } + + IR::Value coords; + u32 num_derivates; + IR::Reg base_reg = txd.coord_reg; + IR::Reg last_reg; + IR::Value handle; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); + } else { + handle = v.X(base_reg++); + } + + const auto read_array{[&]() -> IR::F32 { + return v.ir.ConvertUToF(32, 16, + v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), + v.ir.Imm32(has_lod_clamp ? 12 : 16))); + }}; + switch (txd.type) { + case TextureType::_1D: { + coords = v.F(base_reg); + num_derivates = 1; + last_reg = base_reg + 1; + break; + } + case TextureType::ARRAY_1D: { + last_reg = base_reg + 1; + coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); + num_derivates = 1; + break; + } + case TextureType::_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); + num_derivates = 2; + break; + } + case TextureType::ARRAY_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); + num_derivates = 2; + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + const IR::Reg derivate_reg{txd.derivate_reg}; + IR::Value derivates; + switch (num_derivates) { + case 1: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); + break; + } + case 2: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), + v.F(derivate_reg + 2), v.F(derivate_reg + 3)); + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + IR::Value offset; + if (txd.aoffi != 0) { + offset = MakeOffset(v, last_reg, has_lod_clamp); + } + + IR::F32 lod_clamp; + if (has_lod_clamp) { + const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); + const IR::F32 tmp = v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); + lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + } + + IR::TextureInstInfo info{}; + info.type.Assign(GetType(txd.type, false)); + info.num_derivates.Assign(num_derivates); + info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); + }()}; + + IR::Reg dest_reg{txd.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((txd.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (txd.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TXD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell From cb6fc03e55e9eff0826173a6bcacef3034322f7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 01:08:25 -0300 Subject: [PATCH 141/770] shader: Always pass a lod for TexelFetch --- src/shader_recompiler/frontend/ir/opcodes.inc | 6 ++-- .../maxwell/translate/impl/texture_load.cpp | 2 ++ .../translate/impl/texture_load_swizzled.cpp | 34 +++++++------------ 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 79baacd08..e82db0cd2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -378,7 +378,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) @@ -389,7 +389,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) @@ -400,7 +400,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, F32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index b4063fa6e..df38f87a3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -124,6 +124,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } if (tld.lod != 0) { lod = v.X(meta_reg++); + } else { + lod = v.ir.Imm32(0U); } if (tld.aoffi != 0) { offset = MakeOffset(v, meta_reg, tld.type); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 3e6ebd911..623b8fc23 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -74,62 +74,55 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { const IR::Reg reg_a{tlds.src_reg_a}; const IR::Reg reg_b{tlds.src_reg_b}; IR::Value coords; - IR::U32 lod; + IR::U32 lod{v.ir.Imm32(0U)}; IR::Value offsets; IR::U32 multisample; - Shader::TextureType texture_type; + Shader::TextureType texture_type{}; switch (tlds.encoding) { - case 0: { + case 0: texture_type = Shader::TextureType::Color1D; coords = v.X(reg_a); break; - } - case 1: { + case 1: texture_type = Shader::TextureType::Color1D; coords = v.X(reg_a); lod = v.X(reg_b); break; - } - case 2: { + case 2: texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); break; - } - case 4: { + case 4: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); offsets = MakeOffset(v, reg_b); break; - } - case 5: { + case 5: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); lod = v.X(reg_b); break; - } - case 6: { + case 6: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); multisample = v.X(reg_b); break; - } - case 7: { + case 7: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color3D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); break; - } case 8: { CheckAlignment(reg_b, 2); + const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; texture_type = Shader::TextureType::ColorArray2D; - IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); break; } - case 12: { + case 12: CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); texture_type = Shader::TextureType::Color2D; @@ -137,11 +130,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { lod = v.X(reg_b); offsets = MakeOffset(v, reg_b + 1); break; - } - default: { + default: throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); - break; - } } IR::TextureInstInfo info{}; if (tlds.precision == Precision::F16) { From 4d0d29fc2092bf02e102b8bac9cfa1b509274901 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 08:41:21 +0200 Subject: [PATCH 142/770] shader: Address feedback --- .../backend/spirv/emit_spirv_image.cpp | 30 ++++++++-------- .../translate/impl/texture_gradient.cpp | 34 ++++++++++--------- .../maxwell/translate/impl/texture_load.cpp | 10 +++--- .../translate/impl/texture_mipmap_level.cpp | 26 ++++++-------- src/video_core/memory_manager.cpp | 2 +- 5 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 1eba9cc00..03d2ec73e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -72,20 +72,19 @@ public: explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, Id offset, Id lod_clamp) { if (Sirit::ValidId(derivates)) { - boost::container::static_vector deriv_x_accum; - boost::container::static_vector deriv_y_accum; - for (size_t i = 0; i < num_derivates; i++) { - deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); - deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); - } - Id derivates_X = ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()}); - Id derivates_Y = ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()}); - Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); - } else { throw LogicError("Derivates must be present"); } + boost::container::static_vector deriv_x_accum; + boost::container::static_vector deriv_y_accum; + for (size_t i = 0; i < num_derivates; i++) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + } + const Id derivates_X{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + const Id derivates_Y{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } @@ -100,10 +99,10 @@ public: operands.push_back(value); } - void Add(spv::ImageOperandsMask new_mask, Id value, Id value_2) { + void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { mask = static_cast(static_cast(mask) | static_cast(new_mask)); - operands.push_back(value); + operands.push_back(value_1); operands.push_back(value_2); } @@ -345,7 +344,8 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coo Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); + const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, + offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, operands.Mask(), operands.Span()); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index 00768e167..c66468a48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -47,7 +47,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { const IR::U32 value{v.X(reg)}; - const u32 base = has_lod_clamp ? 12 : 16; + const u32 base{has_lod_clamp ? 12U : 16U}; return v.ir.CompositeConstruct( v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); @@ -74,20 +74,21 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::Value coords; - u32 num_derivates; - IR::Reg base_reg = txd.coord_reg; + u32 num_derivates{}; + IR::Reg base_reg{txd.coord_reg}; IR::Reg last_reg; IR::Value handle; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(base_reg++); + } else { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); } const auto read_array{[&]() -> IR::F32 { - return v.ir.ConvertUToF(32, 16, - v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), - v.ir.Imm32(has_lod_clamp ? 12 : 16))); + const IR::U32 base{v.ir.Imm32(0)}; + const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; + const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; + return v.ir.ConvertUToF(32, 16, array_index); }}; switch (txd.type) { case TextureType::_1D: { @@ -141,19 +142,20 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::F32 lod_clamp; if (has_lod_clamp) { - const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); - const IR::F32 tmp = v.ir.ConvertUToF( - 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); - lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. + // to convert a fixed point, float(value) / float(1 << fixed_point) + // in this case the fixed_point is 8. + const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast(1U << 8))}; + const IR::F32 fixp_lc{v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; + lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); } IR::TextureInstInfo info{}; info.type.Assign(GetType(txd.type, false)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); - }()}; + const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; IR::Reg dest_reg{txd.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index df38f87a3..987b7ec34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -117,10 +117,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::Value offset; IR::U32 lod; IR::U32 multisample; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); } if (tld.lod != 0) { lod = v.X(meta_reg++); @@ -138,9 +138,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; info.type.Assign(GetType(tld.type, false)); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); - }()}; + const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index ee13ede30..b6efc04f0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -81,39 +81,35 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { BitField<36, 13, u64> cbuf_offset; } const tmml{insn}; - if ((tmml.mask & 0xC) != 0) { + if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - IR::F32 transform_constant = v.ir.Imm32(256.0f); + IR::F32 transform_constant{v.ir.Imm32(256.0f)}; const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; IR::Reg meta_reg{tmml.meta_reg}; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; info.type.Assign(GetType(tmml.type, false)); - const IR::Value sample{ - [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; - const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RP}, - .fmz_mode{IR::FmzMode::FTZ}, - }; IR::Reg dest_reg{tmml.dest_reg}; for (size_t element = 0; element < 4; ++element) { if (((tmml.mask >> element) & 1) == 0) { continue; } - IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; - v.F(dest_reg, - element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + IR::F32 value{v.ir.CompositeExtract(sample, element)}; + if (element < 2) { + value = v.ir.FPMul(value, transform_constant); + } + v.F(dest_reg, value); ++dest_reg; } } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 05e27c687..882eff880 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -64,7 +64,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end()) { - // ASSERT(it->first == gpu_addr); + ASSERT(it->first == gpu_addr); map_ranges.erase(it); } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); From 67afdaf56622d9e4129dfae42abfa743a4b025d4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 19:20:59 +0200 Subject: [PATCH 143/770] shader: Fix TXD --- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 03d2ec73e..bc7de509b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -71,7 +71,7 @@ public: explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, Id offset, Id lod_clamp) { - if (Sirit::ValidId(derivates)) { + if (!Sirit::ValidId(derivates)) { throw LogicError("Derivates must be present"); } boost::container::static_vector deriv_x_accum; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e82db0cd2..ffd0cc690 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -403,7 +403,7 @@ OPCODE(ImageGatherDref, F32x4, U32, OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) -OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) // Warp operations OPCODE(VoteAll, U1, U1, ) From 0c4cf3b9eb7de6624a844ae9ac9d2ba6b2ef3ac2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 21:52:06 +0200 Subject: [PATCH 144/770] shader: Implement ClipDistance --- .../backend/spirv/emit_context.cpp | 9 +++++++++ src/shader_recompiler/backend/spirv/emit_context.h | 3 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 13 +++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 10 ++++++++++ src/shader_recompiler/shader_info.h | 1 + 5 files changed, 36 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index c8ce58254..2e3e3346d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -176,6 +176,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { AddCapability(spv::Capability::Float64); F64.Define(*this, TypeFloat(64), "f64"); } + if (info.stores_clip_distance) { + Array8F32 = Name(TypeArray(F32[1], Constant(U32[1], 8)), "array_8_f32"); + } } void EmitContext::DefineCommonConstants() { @@ -502,6 +505,12 @@ void EmitContext::DefineOutputs(const Info& info) { } output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); } + if (info.stores_clip_distance) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing PointSize in Fragment stage"); + } + clip_distances = DefineOutput(*this, Array8F32, spv::BuiltIn::ClipDistance); + } for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (info.stores_generics[i]) { output_generics[i] = DefineOutput(*this, F32[4]); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 3965869f0..bffe1558c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -67,6 +67,8 @@ public: VectorTypes F16; VectorTypes F64; + Id Array8F32{}; + Id true_value{}; Id false_value{}; Id u32_zero_value{}; @@ -105,6 +107,7 @@ public: Id base_vertex{}; Id front_face{}; Id point_coord{}; + Id clip_distances{}; Id fswzadd_lut_a{}; Id fswzadd_lut_b{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d02761f32..2eaeb29de 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -44,6 +44,19 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionZ: case IR::Attribute::PositionW: return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 base{static_cast(IR::Attribute::ClipDistance0)}; + const u32 index{static_cast(attr) - base}; + const Id clip_num{ctx.Constant(ctx.U32[1], index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); + } default: throw NotImplementedException("Read attribute {}", attr); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 730d3e91e..50ffc4c19 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -71,6 +71,16 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.stores_position = true; break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: + info.stores_clip_distance = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index c9f6d9ef7..a62ad1e79 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -81,6 +81,7 @@ struct Info { std::array stores_generics{}; bool stores_position{}; bool stores_point_size{}; + bool stores_clip_distance{}; bool uses_fp16{}; bool uses_fp64{}; From 09e1927b702b6a57fdea3c137b8e08c1ecd06b4f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 18:39:42 -0300 Subject: [PATCH 145/770] spirv: Remove unnecesary variable for clip distances --- src/shader_recompiler/backend/spirv/emit_context.cpp | 6 ++---- src/shader_recompiler/backend/spirv/emit_context.h | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2e3e3346d..b0f7e2269 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -176,9 +176,6 @@ void EmitContext::DefineCommonTypes(const Info& info) { AddCapability(spv::Capability::Float64); F64.Define(*this, TypeFloat(64), "f64"); } - if (info.stores_clip_distance) { - Array8F32 = Name(TypeArray(F32[1], Constant(U32[1], 8)), "array_8_f32"); - } } void EmitContext::DefineCommonConstants() { @@ -509,7 +506,8 @@ void EmitContext::DefineOutputs(const Info& info) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in Fragment stage"); } - clip_distances = DefineOutput(*this, Array8F32, spv::BuiltIn::ClipDistance); + const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; + clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); } for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (info.stores_generics[i]) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index bffe1558c..dc4e1227a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -67,8 +67,6 @@ public: VectorTypes F16; VectorTypes F64; - Id Array8F32{}; - Id true_value{}; Id false_value{}; Id u32_zero_value{}; From 0c933e20dec02e12a4644281b9b7bf9716a5cbb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 21:28:00 -0300 Subject: [PATCH 146/770] vk_pipeline_cache: Name SPIR-V modules --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb4df9000..30a707599 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -777,6 +777,11 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); + if (device.HasDebuggingToolAttached()) { + const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], + key.unique_hashes[index][1])}; + modules[stage_index].SetObjectNameEXT(name.c_str()); + } } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, @@ -836,8 +841,13 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(base_profile, program, binding)}; + vk::ShaderModule spv_module{BuildShader(device, code)}; + if (device.HasDebuggingToolAttached()) { + const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + spv_module.SetObjectNameEXT(name.c_str()); + } return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - BuildShader(device, code)}; + std::move(spv_module)}; } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { From c826220733678198e9aef328a9808b062b06c5df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 01:06:17 -0300 Subject: [PATCH 147/770] shader: Unroll "using enum" for opcode declarations --- src/shader_recompiler/frontend/ir/opcodes.cpp | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 8492a13d5..1cb9db6c9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -17,7 +17,33 @@ struct OpcodeMeta { std::array arg_types; }; -using enum Type; +// using enum Type; +constexpr Type Void{Type::Void}; +constexpr Type Opaque{Type::Opaque}; +constexpr Type Label{Type::Label}; +constexpr Type Reg{Type::Reg}; +constexpr Type Pred{Type::Pred}; +constexpr Type Attribute{Type::Attribute}; +constexpr Type U1{Type::U1}; +constexpr Type U8{Type::U8}; +constexpr Type U16{Type::U16}; +constexpr Type U32{Type::U32}; +constexpr Type U64{Type::U64}; +constexpr Type F16{Type::F16}; +constexpr Type F32{Type::F32}; +constexpr Type F64{Type::F64}; +constexpr Type U32x2{Type::U32x2}; +constexpr Type U32x3{Type::U32x3}; +constexpr Type U32x4{Type::U32x4}; +constexpr Type F16x2{Type::F16x2}; +constexpr Type F16x3{Type::F16x3}; +constexpr Type F16x4{Type::F16x4}; +constexpr Type F32x2{Type::F32x2}; +constexpr Type F32x3{Type::F32x3}; +constexpr Type F32x4{Type::F32x4}; +constexpr Type F64x2{Type::F64x2}; +constexpr Type F64x3{Type::F64x3}; +constexpr Type F64x4{Type::F64x4}; constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ From eaafd53cfedf0c7ae40a3f790af5f0aec63ebd13 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 19:46:10 -0300 Subject: [PATCH 148/770] shader: Implement LDG .U.128 as .128 --- .../frontend/maxwell/translate/impl/load_store_memory.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 748b856c9..71688b1d7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -118,7 +118,8 @@ void TranslatorVisitor::LDG(u64 insn) { } break; } - case LoadSize::B128: { + case LoadSize::B128: + case LoadSize::U128: { if (!IR::IsAligned(dest_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } @@ -128,8 +129,6 @@ void TranslatorVisitor::LDG(u64 insn) { } break; } - case LoadSize::U128: - throw NotImplementedException("LDG U.128"); default: throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); } From 5f22cd89e20cd39c1395b7bd3b8e667a40f53751 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 19:46:38 -0300 Subject: [PATCH 149/770] shader: Fix constant propagation to use reverse post order --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 052f1609b..7da4d50ef 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -424,7 +425,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { for (IR::Inst& inst : block->Instructions()) { ConstantPropagation(*block, inst); } From f1dd743731bd0e7b7f1ef172882971bcd15eb5bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 01:07:51 -0300 Subject: [PATCH 150/770] shader: Fix dependency on identity removal pass --- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b0f7e2269..3b3fea50c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -114,7 +114,7 @@ EmitContext::~EmitContext() = default; Id EmitContext::Def(const IR::Value& value) { if (!value.IsImmediate()) { - return value.Inst()->Definition(); + return value.InstRecursive()->Definition(); } switch (value.Type()) { case IR::Type::Void: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 6389d80bf..9dc769307 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -250,7 +250,7 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { // Let the context handle immediate definitions, as it already knows how return ctx.Def(arg); } - IR::Inst* const arg_inst{arg.Inst()}; + IR::Inst* const arg_inst{arg.InstRecursive()}; if (const Id def{arg_inst->Definition()}; Sirit::ValidId(def)) { // Return the current definition if it exists return def; @@ -296,7 +296,12 @@ Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { void EmitVoid(EmitContext&) {} Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { - return ctx.Def(value); + if (const Id id = ctx.Def(value); Sirit::ValidId(id)) { + return id; + } + const Id def{ctx.ForwardDeclarationId()}; + value.InstRecursive()->SetDefinition(def); + return def; } void EmitGetZeroFromOp(EmitContext&) { From 2fc698b040e7e25223ba6ebe31abb04b1fc65f06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 01:36:22 -0300 Subject: [PATCH 151/770] vulkan: Build pipelines in parallel at runtime Wait from the worker thread for a pipeline to build before binding it to the command buffer. This allows queueing pipelines to multiple threads. --- .../renderer_vulkan/vk_compute_pipeline.cpp | 95 +++++++++++-------- .../renderer_vulkan/vk_compute_pipeline.h | 30 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 72 ++++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 31 +++--- .../renderer_vulkan/vk_pipeline_cache.cpp | 78 +++++++-------- .../renderer_vulkan/vk_pipeline_cache.h | 24 +++-- .../renderer_vulkan/vk_rasterizer.cpp | 15 +-- .../renderer_vulkan/vk_scheduler.cpp | 10 +- src/video_core/renderer_vulkan/vk_scheduler.h | 7 +- 9 files changed, 197 insertions(+), 165 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e544d745..1c3249e3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -27,8 +27,9 @@ DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& inf ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + Common::ThreadWorker* thread_worker, const Shader::Info& info_, + vk::ShaderModule spv_module_) + : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { DescriptorLayoutTuple tuple{CreateLayout(device, info)}; descriptor_set_layout = std::move(tuple.descriptor_set_layout); @@ -36,46 +37,55 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + auto func{[this, &device] { + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(); + } } -void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { +void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, TextureCache& texture_cache) { + update_descriptor_queue.Acquire(); + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); - ++index; + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); -} -void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, - TextureCache& texture_cache) { texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; @@ -103,15 +113,26 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t index{}; + size_t image_index{}; PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - *update_descriptor_queue, index); -} + update_descriptor_queue, image_index); -VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + if (!building_flag.test()) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + }); + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - return descriptor_set; + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index e82e5816b..02da504f7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,10 @@ #pragma once +#include + #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" @@ -16,36 +19,26 @@ namespace Vulkan { class Device; +class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - const Shader::Info& info, vk::ShaderModule spv_module); + Common::ThreadWorker* thread_worker, const Shader::Info& info, + vk::ShaderModule spv_module); - ComputePipeline& operator=(ComputePipeline&&) noexcept = default; - ComputePipeline(ComputePipeline&&) noexcept = default; + ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; + ComputePipeline(ComputePipeline&&) noexcept = delete; ComputePipeline& operator=(const ComputePipeline&) = delete; ComputePipeline(const ComputePipeline&) = delete; - void ConfigureBufferCache(BufferCache& buffer_cache); - void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); - - [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); - - [[nodiscard]] VkPipeline Handle() const noexcept { - return *pipeline; - } - - [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { - return *pipeline_layout; - } + void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: - VKUpdateDescriptorQueue* update_descriptor_queue; + VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; vk::ShaderModule spv_module; @@ -54,6 +47,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 278509bf0..ddc08b8c4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -112,13 +112,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, BufferCache& buffer_cache_, TextureCache& texture_cache_, const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state, + const FixedPipelineState& state_, std::array stages, const std::array& infos) - : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, - buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, - update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, scheduler{scheduler_}, + update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ + std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -128,8 +130,17 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; - MakePipeline(device, state, render_pass); + auto func{[this, &device, &render_pass_cache] { + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, render_pass); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (worker_thread) { + worker_thread->QueueWork(std::move(func)); + } else { + func(); + } } void GraphicsPipeline::Configure(bool is_indexed) { @@ -138,67 +149,72 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector image_view_indices; static_vector samplers; - texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeGraphicsDescriptors(); - const auto& regs{maxwell3d->regs}; + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache->UnbindGraphicsStorageBuffers(stage); + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); ++index; } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; for (const auto& desc : info.texture_descriptors) { const u32 cbuf_index{desc.cbuf_index}; const u32 cbuf_offset{desc.cbuf_offset}; ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory->Read(addr)}; + const u32 raw_handle{gpu_memory.Read(addr)}; const TextureHandle handle(raw_handle, via_header_index); image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache->UpdateGraphicsBuffers(is_indexed); - texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.UpdateGraphicsBuffers(is_indexed); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - buffer_cache->BindHostGeometryBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - buffer_cache->BindHostStageBuffers(stage); + buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - *texture_cache, *update_descriptor_queue, index); + texture_cache, update_descriptor_queue, index); } - texture_cache->UpdateRenderTargets(false); - scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); - - scheduler->BindGraphicsPipeline(*pipeline); + texture_cache.UpdateRenderTargets(false); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + if (!building_flag.test()) { + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + if (scheduler.UpdateGraphicsPipeline(this)) { + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + }); + } if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); - scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ba1d34a83..4e0583157 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,13 +5,15 @@ #pragma once #include +#include +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -25,34 +27,34 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline() = default; explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, - TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + BufferCache& buffer_cache, TextureCache& texture_cache, + const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const FixedPipelineState& state, std::array stages, const std::array& infos); void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; - GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; + GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; private: - void MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass); + void MakePipeline(const Device& device, VkRenderPass render_pass); - Tegra::Engines::Maxwell3D* maxwell3d{}; - Tegra::MemoryManager* gpu_memory{}; - TextureCache* texture_cache{}; - BufferCache* buffer_cache{}; - VKScheduler* scheduler{}; - VKUpdateDescriptorQueue* update_descriptor_queue{}; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; + TextureCache& texture_cache; + BufferCache& buffer_cache; + VKScheduler& scheduler; + VKUpdateDescriptorQueue& update_descriptor_queue; + const FixedPipelineState state; std::array spv_modules; std::array stage_infos; @@ -61,6 +63,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30a707599..e3d9debf4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -518,9 +518,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - Common::ThreadWorker worker(11, "PipelineBuilder"); - std::mutex cache_mutex; struct { + std::mutex mutex; size_t total{0}; size_t built{0}; bool has_loaded{false}; @@ -542,51 +541,53 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } u32 num_envs{}; file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - auto envs{std::make_shared>(num_envs)}; - for (FileEnvironment& env : *envs) { + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { env.Deserialize(file); } - if (envs->front().ShaderStage() == Shader::Stage::Compute) { + if (envs.front().ShaderStage() == Shader::Stage::Compute) { ComputePipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; - ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; compute_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; - for (auto& env : *envs) { + for (auto& env : envs) { env_ptrs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } ++state.total; } { - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); state.has_loaded = true; } - worker.WaitForRequests(); + workers.WaitForRequests(); } size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -619,7 +620,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -662,10 +663,10 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateGraphicsPipeline(); - return &pipeline; + return pipeline.get(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -691,10 +692,10 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateComputePipeline(key, shader); - return &pipeline; + return pipeline.get(); } bool PipelineCache::RefreshStages() { @@ -743,9 +744,9 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr c return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, - const GraphicsPipelineCacheKey& key, - std::span envs) { +std::unique_ptr PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -783,12 +784,14 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, modules[stage_index].SetObjectNameEXT(name.c_str()); } } - return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, - std::move(modules), infos); + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, + update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), + infos); } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { +std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; @@ -809,22 +812,22 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { generic_envs.push_back(&env); envs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); } return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader) { +std::unique_ptr PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(key, std::array{&env}, pipeline_cache_filename); @@ -832,9 +835,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineCacheKey& key, - Shader::Environment& env) const { +std::unique_ptr PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -846,8 +849,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; spv_module.SetObjectNameEXT(name.c_str()); } - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - std::move(spv_module)}; + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique(device, descriptor_pool, update_descriptor_queue, + thread_worker, program.info, std::move(spv_module)); } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b55e14189..609f00898 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -14,6 +14,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -145,16 +146,19 @@ private: const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - GraphicsPipeline CreateGraphicsPipeline(); + std::unique_ptr CreateGraphicsPipeline(); - GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs); + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel); - ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, - Shader::Environment& env) const; + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env, + bool build_in_parallel); Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); @@ -174,13 +178,15 @@ private: GraphicsPipelineCacheKey graphics_key{}; std::array shader_infos{}; - std::unordered_map compute_cache; - std::unordered_map graphics_cache; + std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; std::string pipeline_cache_filename; + + Common::ThreadWorker workers; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7d9927dd..f0bd4b8af 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -276,22 +276,11 @@ void RasterizerVulkan::DispatchCompute() { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - update_descriptor_queue.Acquire(); - pipeline->ConfigureBufferCache(buffer_cache); - pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); - const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; - const VkPipeline pipeline_handle{pipeline->Handle()}; - const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; - scheduler.Record( - [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, - descriptor_set, nullptr); - cmdbuf.Dispatch(dim[0], dim[1], dim[2]); - }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..25a4933e5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { +bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { if (state.graphics_pipeline == pipeline) { - return; + return false; } state.graphics_pipeline = pipeline; - Record([pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - }); + return true; } void VKScheduler::WorkerThread() { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + Common::SetCurrentThreadName("yuzu:VulkanWorker"); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..a40bb8bcd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -22,6 +22,7 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; +class GraphicsPipeline; class StateTracker; class VKQueryCache; @@ -52,8 +53,8 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); - /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(VkPipeline pipeline); + /// Update the pipeline to the current execution context. + bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -170,7 +171,7 @@ private: VkRenderPass renderpass = nullptr; VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; + GraphicsPipeline* graphics_pipeline = nullptr; }; void WorkerThread(); From 8771639d1e97cf2224657c0d2ee87d800a784ac8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 03:15:28 -0300 Subject: [PATCH 152/770] vulkan: Create pipeline layouts in separate threads --- .../renderer_vulkan/pipeline_helper.h | 72 ++++++++++--------- .../renderer_vulkan/vk_compute_pass.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 19 ++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 26 +++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_update_descriptor.cpp | 4 +- .../renderer_vulkan/vk_update_descriptor.h | 2 +- 7 files changed, 65 insertions(+), 63 deletions(-) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 0a59aa659..eebe5d569 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -35,49 +35,52 @@ struct TextureHandle { u32 sampler; }; -struct DescriptorLayoutTuple { - vk::DescriptorSetLayout descriptor_set_layout; - vk::PipelineLayout pipeline_layout; - vk::DescriptorUpdateTemplateKHR descriptor_update_template; -}; - class DescriptorLayoutBuilder { public: - DescriptorLayoutTuple Create(const vk::Device& device) { - DescriptorLayoutTuple result; - if (!bindings.empty()) { - result.descriptor_set_layout = device.CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); + DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + + vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + if (bindings.empty()) { + return nullptr; } - result.pipeline_layout = device.CreatePipelineLayout({ + return device->CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + + vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) const { + if (entries.empty()) { + return nullptr; + } + return device->CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = pipeline_layout, + .set = 0, + }); + } + + vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + return device->CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, - .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .setLayoutCount = descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }); - if (!entries.empty()) { - result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *result.descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *result.pipeline_layout, - .set = 0, - }); - } - return result; } void Add(const Shader::Info& info, VkShaderStageFlags stage) { @@ -113,6 +116,7 @@ private: offset += sizeof(DescriptorUpdateEntry); } + const vk::Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a444d55d3..760857839 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -237,7 +237,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return nullptr; } const VkDescriptorSet set = descriptor_allocator->Commit(); - update_descriptor_queue.Send(*descriptor_template, set); + update_descriptor_queue.Send(descriptor_template.address(), set); return set; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 1c3249e3c..fb19bb4b9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -17,13 +17,6 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { -DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { - DescriptorLayoutBuilder builder; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - return builder.Create(device.GetLogical()); -} -} // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, @@ -31,10 +24,12 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip vk::ShaderModule spv_module_) : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutTuple tuple{CreateLayout(device, info)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); auto func{[this, &device] { @@ -128,7 +123,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ddc08b8c4..d17b79e02 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -27,8 +27,8 @@ using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder; +DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -39,7 +39,7 @@ DescriptorLayoutTuple CreateLayout(const Device& device, std::span @@ -124,13 +124,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - auto func{[this, &device, &render_pass_cache] { + auto func{[this, &device, &render_pass_cache, builder] { + const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; + pipeline_layout = builder.CreatePipelineLayout(set_layout); + descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); building_flag.test_and_set(); @@ -206,11 +208,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, - nullptr); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); }); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e3d9debf4..597261964 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -620,7 +620,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + workers(11, "yuzu:PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..bea9b8012 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,12 +36,12 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, +void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set) { const void* const data = upload_start; const vk::Device* const logical = &device.GetLogical(); scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, data); + logical->UpdateDescriptorSet(set, *update_template, data); }); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..82bc9920c 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,7 @@ public: void Acquire(); - void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); + void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ From d0a529683a2e5a693b53c6f24f6816c06f8f7e65 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 04:09:09 -0300 Subject: [PATCH 153/770] vulkan: Serialize pipelines on a separate thread --- .../renderer_vulkan/vk_pipeline_cache.cpp | 128 +++++++++--------- .../renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 63 insertions(+), 66 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 597261964..79cd204c7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -61,6 +61,33 @@ public: ~GenericEnvironment() override = default; + u32 TextureBoundBuffer() const final { + return texture_bound; + } + + u32 LocalMemorySize() const final { + return local_memory_size; + } + + u32 SharedMemorySize() const final { + return shared_memory_size; + } + + std::array WorkgroupSize() const final { + return workgroup_size; + } + + u64 ReadInstruction(u32 address) final { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); + } + std::optional Analyze() { const std::optional size{TryFindSize()}; if (!size) { @@ -97,26 +124,10 @@ public: return Common::CityHash128(data.get(), size); } - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(ReadSize())}; - const auto data{std::make_unique(code_size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); - + const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - const u32 local_memory_size{LocalMemorySize()}; - const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) @@ -124,10 +135,10 @@ public: .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) - .write(data.get(), code_size); + .write(reinterpret_cast(code.data()), code_size); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); @@ -137,8 +148,6 @@ public: .write(reinterpret_cast(&type), sizeof(type)); } if (stage == Shader::Stage::Compute) { - const std::array workgroup_size{WorkgroupSize()}; - const u32 shared_memory_size{SharedMemorySize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -220,6 +229,11 @@ protected: std::unordered_map texture_types; std::unordered_map cbuf_values; + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -270,6 +284,10 @@ public: UNREACHABLE_MSG("Invalid program={}", program); break; } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; } ~GraphicsEnvironment() override = default; @@ -294,24 +312,6 @@ public: cbuf.address, cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return maxwell3d->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const u64 size{sph.LocalMemorySize()}; - ASSERT(size <= std::numeric_limits::max()); - return static_cast(size); - } - - u32 SharedMemorySize() const override { - throw Shader::LogicError("Requesting shared memory size in graphics stage"); - } - - std::array WorkgroupSize() const override { - throw Shader::LogicError("Requesting workgroup size in a graphics stage"); - } - private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -325,7 +325,12 @@ public: u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } ~ComputeEnvironment() override = default; @@ -351,25 +356,6 @@ public: cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return kepler_compute->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.local_pos_alloc; - } - - u32 SharedMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.shared_alloc; - } - - std::array WorkgroupSize() const override { - const auto& qmd{kepler_compute->launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -621,7 +607,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder") { + workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -796,7 +782,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; - boost::container::static_vector generic_envs; boost::container::static_vector envs; const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; @@ -810,13 +795,22 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - generic_envs.push_back(&env); envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; - if (!pipeline_cache_filename.empty()) { - SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); + if (pipeline_cache_filename.empty()) { + return pipeline; } + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + boost::container::static_vector + env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] != u128{}) { + env_ptrs.push_back(&envs[index]); + } + } + SerializePipeline(key, env_ptrs, pipeline_cache_filename); + }); return pipeline; } @@ -830,8 +824,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); } return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 609f00898..343ea1554 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -187,6 +187,7 @@ private: std::string pipeline_cache_filename; Common::ThreadWorker workers; + Common::ThreadWorker serialization_thread; }; } // namespace Vulkan From b4a5e767d0a60d44c77460bd3a4062c5f69fb6c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Apr 2021 01:17:47 -0300 Subject: [PATCH 154/770] shader: Fix branches to visited virtual blocks --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 ++ .../frontend/maxwell/control_flow.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9dc769307..b8e3b8527 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -56,6 +56,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.Label(); } else if constexpr (std::is_same_v) { return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Reg(); } } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 784f9df8a..ac8707847 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -486,6 +486,16 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { // Block already exists and it has been visited + if (function.blocks.begin() != it) { + // Check if the previous node is the virtual variant of the label + // This won't exist if a virtual node is not needed or it hasn't been visited + // If it hasn't been visited and a virtual node is needed, this will still behave as + // expected because the node impersonated with its virtual node. + const auto prev{std::prev(it)}; + if (it->begin.Virtual() == prev->begin) { + return &*prev; + } + } return &*it; } Block* const new_block{block_pool.Create(Block{ From c3bace756f2b21057e89d104ad18a34b2ad9083c Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 06:39:47 +0200 Subject: [PATCH 155/770] shader: Fold comparisons and Pack/Unpack16 --- .../ir_opt/constant_propagation_pass.cpp | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7da4d50ef..15e16956e 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include +#include #include #include -#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -332,6 +332,18 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { } } +void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { + const IR::Value value{inst.Arg(0)}; + if (value.IsImmediate()) { + return; + } + IR::Inst* const arg_inst{value.InstRecursive()}; + if (arg_inst->Opcode() == reverse) { + inst.ReplaceUsesWith(arg_inst->Arg(0)); + return; + } +} + template IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { using Traits = LambdaTraits; @@ -372,6 +384,10 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); + case IR::Opcode::PackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16); + case IR::Opcode::UnpackHalf2x16: + return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16); case IR::Opcode::SelectU1: case IR::Opcode::SelectU8: case IR::Opcode::SelectU16: @@ -395,6 +411,30 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ULessThan: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); return; + case IR::Opcode::SLessThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); + return; + case IR::Opcode::ULessThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); + return; + case IR::Opcode::SGreaterThan: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); + return; + case IR::Opcode::UGreaterThan: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); + return; + case IR::Opcode::SGreaterThanEqual: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); + return; + case IR::Opcode::UGreaterThanEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); + return; + case IR::Opcode::IEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); + return; + case IR::Opcode::INotEqual: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); + return; case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { From bee81887998070d213c984d4ae4e5ae35de6fd96 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 07:42:58 +0200 Subject: [PATCH 156/770] shader: Fold composite extract --- .../ir_opt/constant_propagation_pass.cpp | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 15e16956e..8999c3a3d 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -368,6 +368,50 @@ void FoldBranchConditional(IR::Inst& inst) { } } +std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, + IR::Opcode construct, u32 first_index) { + IR::Inst* const inst{inst_value.InstRecursive()}; + if (inst->Opcode() == construct) { + return inst->Arg(first_index); + } + + if (inst->Opcode() != insert) { + return std::nullopt; + } + + IR::Value value_index{inst->Arg(2)}; + if (!value_index.IsImmediate()) { + return std::nullopt; + } + + const u32 second_index = value_index.U32(); + if (first_index != second_index) { + IR::Value value_composite{inst->Arg(0)}; + if (value_composite.IsImmediate()) { + return std::nullopt; + } + return FoldCompositeExtractImpl(value_composite, insert, construct, first_index); + } + return inst->Arg(1); +} + +void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) { + const IR::Value value_1{inst.Arg(0)}; + const IR::Value value_2{inst.Arg(1)}; + if (value_1.IsImmediate()) { + return; + } + if (!value_2.IsImmediate()) { + return; + } + const u32 first_index = value_2.U32(); + auto result = FoldCompositeExtractImpl(value_1, insert, construct, first_index); + if (!result) { + return; + } + inst.ReplaceUsesWith(*result); +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -458,6 +502,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); + case IR::Opcode::CompositeExtractF32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, + IR::Opcode::CompositeInsertF32x2); + case IR::Opcode::CompositeExtractF32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3, + IR::Opcode::CompositeInsertF32x3); + case IR::Opcode::CompositeExtractF32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4, + IR::Opcode::CompositeInsertF32x4); + case IR::Opcode::CompositeExtractF16x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2, + IR::Opcode::CompositeInsertF16x2); + case IR::Opcode::CompositeExtractF16x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3, + IR::Opcode::CompositeInsertF16x3); + case IR::Opcode::CompositeExtractF16x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, + IR::Opcode::CompositeInsertF16x4); default: break; } From 5ed8f2438498d3281c2ce8621869995de3908413 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 07:52:36 +0200 Subject: [PATCH 157/770] shader: Stub VOTE.VTG --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 7 +++++++ .../maxwell/translate/impl/move_special_register.cpp | 4 ++++ .../frontend/maxwell/translate/impl/not_implemented.cpp | 4 ---- .../frontend/maxwell/translate/impl/vote.cpp | 4 ++++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index edf8c05d4..5258ede09 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -256,6 +256,13 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::CSM_TA: + case FlowTest::CSM_TR: + case FlowTest::CSM_MX: + case FlowTest::FCSM_TA: + case FlowTest::FCSM_TR: + case FlowTest::FCSM_MX: + return ir.Imm1(false); default: throw NotImplementedException("Flow test {}", flow_test); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 93cea302a..a295f4c5e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -95,6 +95,10 @@ enum class SpecialRegister : u64 { return ir.WorkgroupIdY(); case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); + case SpecialRegister::SR_WSCALEFACTOR_XY: + return ir.Imm32(Common::BitCast(1.0f)); + case SpecialRegister::SR_WSCALEFACTOR_Z: + return ir.Imm32(Common::BitCast(1.0f)); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba526817a..83ed0c0fd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -329,10 +329,6 @@ void TranslatorVisitor::VADD(u64) { ThrowNotImplemented(Opcode::VADD); } -void TranslatorVisitor::VOTE_vtg(u64) { - ThrowNotImplemented(Opcode::VOTE_vtg); -} - void TranslatorVisitor::VSET(u64) { ThrowNotImplemented(Opcode::VSET); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index a88894a7e..391520a18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -49,4 +49,8 @@ void TranslatorVisitor::VOTE(u64 insn) { Vote(*this, insn); } +void TranslatorVisitor::VOTE_vtg(u64) { + // Stub +} + } // namespace Shader::Maxwell From fd496d0401e601d3fec4d38208bcf52410e3b74c Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 08:25:55 +0200 Subject: [PATCH 158/770] shader: Stub TLD4's PTP when it isn't constant --- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index bc7de509b..fc40615af 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -40,7 +40,8 @@ public: } const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { - throw NotImplementedException("Not all arguments in PTP are immediate"); + // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); + return; } const IR::Opcode opcode{values[0]->Opcode()}; if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { From d819ba4489b90955286341c739083e638173b938 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 08:34:45 +0200 Subject: [PATCH 159/770] shader: Implement ViewportIndex --- src/shader_recompiler/backend/spirv/emit_context.cpp | 6 ++++++ src/shader_recompiler/backend/spirv/emit_context.h | 3 +++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 10 ++++++++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 10 ++++++++-- .../ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 8 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3b3fea50c..a8041aadc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -509,6 +509,12 @@ void EmitContext::DefineOutputs(const Info& info) { const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); } + if (info.stores_viewport_index && !ignore_viewport_layer) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing ViewportIndex in Fragment stage"); + } + viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); + } for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (info.stores_generics[i]) { output_generics[i] = DefineOutput(*this, F32[4]); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index dc4e1227a..1573c2560 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -106,6 +106,7 @@ public: Id front_face{}; Id point_coord{}; Id clip_distances{}; + Id viewport_index{}; Id fswzadd_lut_a{}; Id fswzadd_lut_b{}; @@ -133,6 +134,8 @@ public: std::vector interfaces; + bool ignore_viewport_layer{}; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index b8e3b8527..cc6b98f7e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -225,6 +225,16 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } + if (info.stores_viewport_index) { + ctx.AddCapability(spv::Capability::MultiViewport); + if (profile.support_viewport_index_layer_non_geometry && + ctx.stage == Shader::Stage::VertexB) { + ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); + ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); + } else { + ctx.ignore_viewport_layer = true; + } + } if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2eaeb29de..e42407f1f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -29,7 +29,7 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } -Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { +std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 element{static_cast(attr) % 4}; const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { @@ -57,6 +57,8 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id clip_num{ctx.Constant(ctx.U32[1], index)}; return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); } + case IR::Attribute::ViewportIndex: + return ctx.ignore_viewport_layer ? std::nullopt : std::optional{ctx.viewport_index}; default: throw NotImplementedException("Read attribute {}", attr); } @@ -204,7 +206,11 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { - ctx.OpStore(OutputAttrPointer(ctx, attr), value); + auto output = OutputAttrPointer(ctx, attr); + if (!output) { + return; + } + ctx.OpStore(*output, value); } void EmitGetAttributeIndexed(EmitContext&) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 50ffc4c19..514de6838 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -81,6 +81,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; + case IR::Attribute::ViewportIndex: + info.stores_viewport_index = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f4b94896c..e13cb948a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -35,6 +35,7 @@ struct Profile { bool support_fp64_signed_zero_nan_preserve{}; bool support_explicit_workgroup_layout{}; bool support_vote{}; + bool support_viewport_index_layer_non_geometry{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a62ad1e79..a5bff40bb 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -82,6 +82,7 @@ struct Info { bool stores_position{}; bool stores_point_size{}; bool stores_clip_distance{}; + bool stores_viewport_index{}; bool uses_fp16{}; bool uses_fp64{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 79cd204c7..1f308eec2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -631,6 +631,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, + .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, From 12f5f320985824d1ebad587ebecb0f8406143ebc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 09:21:53 +0200 Subject: [PATCH 160/770] shader: Mark SSBOs as written when they are --- .../global_memory_to_storage_buffer_pass.cpp | 31 +++++++++++++++++-- src/shader_recompiler/shader_info.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 1faa1ec88..d4bae249b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ using StorageBufferSet = using StorageInstVector = boost::container::small_vector; using VisitedBlocks = boost::container::flat_set, boost::container::small_vector>; +using StorageWritesMap = std::map; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -69,6 +71,22 @@ bool IsGlobalMemory(const IR::Inst& inst) { } } +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemoryWrite(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + /// Converts a global memory opcode to its storage buffer equivalent IR::Opcode GlobalToStorage(IR::Opcode opcode) { switch (opcode) { @@ -248,7 +266,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, /// Collects the storage buffer used by a global memory instruction and the instruction itself void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace) { + StorageInstVector& to_replace, StorageWritesMap& writes_map) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -277,6 +295,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction + const bool is_a_write = IsGlobalMemoryWrite(inst); + auto it = writes_map.find(*storage_buffer); + if (it == writes_map.end()) { + writes_map[*storage_buffer] = is_a_write; + } else { + it->second = it->second || is_a_write; + } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, @@ -350,13 +375,14 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; + StorageWritesMap writes_map; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_map); } } Info& info{program.info}; @@ -366,6 +392,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, + .is_written{writes_map[storage_buffer]}, }); ++storage_index; } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a5bff40bb..d4d039eaf 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -59,6 +59,7 @@ struct StorageBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + bool is_written; }; struct Info { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index fb19bb4b9..6707842ab 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -75,7 +75,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d17b79e02..e8c3a5624 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -163,7 +163,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + desc.is_written); ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; From ecb30c907266921818d5b6b03e341028fa2ea082 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 22:20:57 +0200 Subject: [PATCH 161/770] shader: Improve VOTE.VTG stub --- .../backend/spirv/emit_spirv.h | 8 +++ .../spirv/emit_spirv_context_get_set.cpp | 32 ++++++++++++ .../frontend/ir/ir_emitter.cpp | 37 +++++++++++++- .../frontend/ir/ir_emitter.h | 10 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 8 +++ .../frontend/maxwell/translate/impl/vote.cpp | 5 +- .../ir_opt/ssa_rewrite_pass.cpp | 51 ++++++++++++++++++- 7 files changed, 147 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 9c9e0c5dd..d2eda1f8e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -59,6 +59,14 @@ void EmitSetZFlag(EmitContext& ctx); void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); +void EmitGetFCSMFlag(EmitContext& ctx); +void EmitGetTAFlag(EmitContext& ctx); +void EmitGetTRFlag(EmitContext& ctx); +void EmitGetMXFlag(EmitContext& ctx); +void EmitSetFCSMFlag(EmitContext& ctx); +void EmitSetTAFlag(EmitContext& ctx); +void EmitSetTRFlag(EmitContext& ctx); +void EmitSetMXFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e42407f1f..a96ee6f0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -263,6 +263,38 @@ void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitGetFCSMFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetTAFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetTRFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetMXFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetFCSMFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetTAFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetTRFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSetMXFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5258ede09..ddaa873f2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -198,6 +198,38 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } +U1 IREmitter::GetFCSMFlag() { + return Inst(Opcode::GetFCSMFlag); +} + +U1 IREmitter::GetTAFlag() { + return Inst(Opcode::GetTAFlag); +} + +U1 IREmitter::GetTRFlag() { + return Inst(Opcode::GetTRFlag); +} + +U1 IREmitter::GetMXFlag() { + return Inst(Opcode::GetMXFlag); +} + +void IREmitter::SetFCSMFlag(const U1& value) { + Inst(Opcode::SetFCSMFlag, value); +} + +void IREmitter::SetTAFlag(const U1& value) { + Inst(Opcode::SetTAFlag, value); +} + +void IREmitter::SetTRFlag(const U1& value) { + Inst(Opcode::SetTRFlag, value); +} + +void IREmitter::SetMXFlag(const U1& value) { + Inst(Opcode::SetMXFlag, value); +} + static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { switch (flow_test) { case FlowTest::F: @@ -256,13 +288,14 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + + case FlowTest::FCSM_TR: + return ir.LogicalAnd(ir.GetFCSMFlag(), ir.GetTRFlag()); case FlowTest::CSM_TA: case FlowTest::CSM_TR: case FlowTest::CSM_MX: case FlowTest::FCSM_TA: - case FlowTest::FCSM_TR: case FlowTest::FCSM_MX: - return ir.Imm1(false); default: throw NotImplementedException("Flow test {}", flow_test); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index a4616e247..6e04eec7f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -70,6 +70,16 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); + [[nodiscard]] U1 GetFCSMFlag(); + [[nodiscard]] U1 GetTAFlag(); + [[nodiscard]] U1 GetTRFlag(); + [[nodiscard]] U1 GetMXFlag(); + + void SetFCSMFlag(const U1& value); + void SetTAFlag(const U1& value); + void SetTRFlag(const U1& value); + void SetMXFlag(const U1& value); + [[nodiscard]] U1 Condition(IR::Condition cond); [[nodiscard]] U1 GetFlowTestResult(FlowTest test); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ffd0cc690..702372775 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -46,10 +46,18 @@ OPCODE(GetZFlag, U1, Void OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) OPCODE(GetOFlag, U1, Void, ) +OPCODE(GetFCSMFlag, U1, Void, ) +OPCODE(GetTAFlag, U1, Void, ) +OPCODE(GetTRFlag, U1, Void, ) +OPCODE(GetMXFlag, U1, Void, ) OPCODE(SetZFlag, Void, U1, ) OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +OPCODE(SetFCSMFlag, Void, U1, ) +OPCODE(SetTAFlag, Void, U1, ) +OPCODE(SetTRFlag, Void, U1, ) +OPCODE(SetMXFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 391520a18..2acabb662 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -50,7 +50,10 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // Stub + // LOG_WARNING("VOTE.VTG: Stubbed!"); + auto imm = ir.Imm1(false); + ir.SetFCSMFlag(imm); + ir.SetTRFlag(imm); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 259233746..7dab33034 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -38,6 +38,10 @@ struct ZeroFlagTag : FlagTag {}; struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; +struct FCSMFlagTag : FlagTag {}; +struct TAFlagTag : FlagTag {}; +struct TRFlagTag : FlagTag {}; +struct MXFlagTag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -53,7 +57,8 @@ struct IndirectBranchVariable { }; using Variant = std::variant; + OverflowFlagTag, FCSMFlagTag, TAFlagTag, TRFlagTag, MXFlagTag, + GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -89,6 +94,22 @@ struct DefTable { return overflow_flag; } + [[nodiscard]] ValueMap& operator[](FCSMFlagTag) noexcept { + return fcsm_flag; + } + + [[nodiscard]] ValueMap& operator[](TAFlagTag) noexcept { + return ta_flag; + } + + [[nodiscard]] ValueMap& operator[](TRFlagTag) noexcept { + return tr_flag; + } + + [[nodiscard]] ValueMap& operator[](MXFlagTag) noexcept { + return mr_flag; + } + std::array regs; std::array preds; boost::container::flat_map goto_vars; @@ -97,6 +118,10 @@ struct DefTable { ValueMap sign_flag; ValueMap carry_flag; ValueMap overflow_flag; + ValueMap fcsm_flag; + ValueMap ta_flag; + ValueMap tr_flag; + ValueMap mr_flag; }; IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -247,6 +272,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetOFlag: pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; + case IR::Opcode::SetFCSMFlag: + pass.WriteVariable(FCSMFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetTAFlag: + pass.WriteVariable(TAFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetTRFlag: + pass.WriteVariable(TRFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetMXFlag: + pass.WriteVariable(MXFlagTag{}, block, inst.Arg(0)); + break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); @@ -275,6 +312,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetOFlag: inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; + case IR::Opcode::GetFCSMFlag: + inst.ReplaceUsesWith(pass.ReadVariable(FCSMFlagTag{}, block)); + break; + case IR::Opcode::GetTAFlag: + inst.ReplaceUsesWith(pass.ReadVariable(TAFlagTag{}, block)); + break; + case IR::Opcode::GetTRFlag: + inst.ReplaceUsesWith(pass.ReadVariable(TRFlagTag{}, block)); + break; + case IR::Opcode::GetMXFlag: + inst.ReplaceUsesWith(pass.ReadVariable(MXFlagTag{}, block)); + break; default: break; } From 655f7a570a10218ffb2ed175bb7f0b84530ccae0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 19:27:30 +0200 Subject: [PATCH 162/770] shader: Implement MEMBAR --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 40 +++++++++++++ .../frontend/ir/ir_emitter.cpp | 4 ++ .../frontend/ir/ir_emitter.h | 2 + src/shader_recompiler/frontend/ir/modifiers.h | 13 +++++ src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../translate/impl/barrier_operations.cpp | 56 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 11 ---- 9 files changed, 121 insertions(+), 11 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 03a5793aa..181eac9f2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_barriers.cpp backend/spirv/emit_spirv_bitwise_conversion.cpp backend/spirv/emit_spirv_composite.cpp backend/spirv/emit_spirv_context_get_set.cpp @@ -63,6 +64,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h + frontend/maxwell/translate/impl/barrier_operations.cpp frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp frontend/maxwell/translate/impl/branch_indirect.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index d2eda1f8e..749ad1240 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -28,6 +28,7 @@ void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitMemoryBarrier(EmitContext& ctx, IR::Inst* inst); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp new file mode 100644 index 000000000..413ac25a0 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -0,0 +1,40 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +spv::Scope MemoryScopeToSpirVScope(IR::MemoryScope scope) { + switch (scope) { + case IR::MemoryScope::Warp: + return spv::Scope::Subgroup; + case IR::MemoryScope::Workgroup: + return spv::Scope::Workgroup; + case IR::MemoryScope::Device: + return spv::Scope::Device; + case IR::MemoryScope::System: + return spv::Scope::CrossDevice; + case IR::MemoryScope::DontCare: + return spv::Scope::Invocation; + default: + throw NotImplementedException("Unknown memory scope!"); + } +} + +} // namespace + +void EmitMemoryBarrier(EmitContext& ctx, IR::Inst* inst) { + const auto info{inst->Flags()}; + const auto semantics = + spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | + spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | + spv::MemorySemanticsMask::ImageMemory; + const auto scope = MemoryScopeToSpirVScope(info.scope); + ctx.OpMemoryBarrier(ctx.Constant(ctx.U32[1], static_cast(scope)), + ctx.Constant(ctx.U32[1], static_cast(semantics))); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ddaa873f2..2fd90303f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,10 @@ void IREmitter::SelectionMerge(Block* merge_block) { Inst(Opcode::SelectionMerge, merge_block); } +void IREmitter::MemoryBarrier(BarrierInstInfo info) { + Inst(Opcode::MemoryBarrier, Flags{info}); +} + void IREmitter::Return() { block->SetReturn(); Inst(Opcode::Return); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e04eec7f..5bebf66e3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -136,6 +136,8 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); + [[nodiscard]] void MemoryBarrier(BarrierInstInfo info); + template [[nodiscard]] Dest BitCast(const Source& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 90078f535..7730c25a9 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,6 +25,14 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; +enum class MemoryScope : u32 { + DontCare, + Warp, + Workgroup, + Device, + System +}; + struct FpControl { bool no_contraction{false}; FpRounding rounding{FpRounding::DontCare}; @@ -32,6 +40,11 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +union BarrierInstInfo { + u32 raw; + BitField<0, 3, MemoryScope> scope; +}; + union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 702372775..d9e0d5471 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -16,6 +16,9 @@ OPCODE(Return, Void, OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) +// Barriers +OPCODE(MemoryBarrier, Void, ) + // Special operations OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..933af572c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +// Seems to be in CUDA terminology. +enum class LocalScope : u64 { + CTG = 0, + GL = 1, + SYS = 2, + VC = 3, +}; + +IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { + switch (scope) { + case LocalScope::CTG: + return IR::MemoryScope::Warp; + case LocalScope::GL: + return IR::MemoryScope::Device; + case LocalScope::SYS: + return IR::MemoryScope::System; + case LocalScope::VC: + return IR::MemoryScope::Workgroup; // or should be device? + default: + throw NotImplementedException("Unimplemented Local Scope {}", scope); + } +} + +} // namespace + +void TranslatorVisitor::MEMBAR(u64 inst) { + union { + u64 raw; + BitField<8, 2, LocalScope> scope; + } membar{inst}; + IR::BarrierInstInfo info{}; + info.scope.Assign(LocalScopeToMemoryScope(membar.scope)); + ir.MemoryBarrier(info); +} + +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op +} + +void TranslatorVisitor::BAR(u64) { + throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 83ed0c0fd..80a6ed578 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -37,10 +37,6 @@ void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } -void TranslatorVisitor::BAR(u64) { - ThrowNotImplemented(Opcode::BAR); -} - void TranslatorVisitor::BPT(u64) { ThrowNotImplemented(Opcode::BPT); } @@ -73,9 +69,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::DEPBAR() { - // DEPBAR is a no-op -} void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); @@ -189,10 +182,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::MEMBAR(u64) { - ThrowNotImplemented(Opcode::MEMBAR); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } From 595806fb1c81f0c57bb31d1f232d1447d4f61745 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 21:59:58 +0200 Subject: [PATCH 163/770] shader: Fix shared memory on cool drivers --- src/shader_recompiler/backend/spirv/emit_context.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a8041aadc..794cd4ed8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -238,6 +238,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; From 45d547af11a18434ea17e4427db7286856a19537 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 23:05:47 +0200 Subject: [PATCH 164/770] shader: Implement SR_LaneId --- src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 1 + 7 files changed, 15 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 749ad1240..17a452e0e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -71,6 +71,7 @@ void EmitSetMXFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +Id EmitLaneId(EmitContext& ctx); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); Id EmitUndefU8(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index a96ee6f0d..f13c0ee72 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -303,6 +303,10 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitLaneId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 2fd90303f..b5f61956a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U32 IREmitter::LaneId() { + return Inst(Opcode::LaneId); +} + U32 IREmitter::LoadGlobalU8(const U64& address) { return Inst(Opcode::LoadGlobalU8, address); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5bebf66e3..e034d672f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -97,6 +97,8 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U32 LaneId(); + [[nodiscard]] U32 LoadGlobalU8(const U64& address); [[nodiscard]] U32 LoadGlobalS8(const U64& address); [[nodiscard]] U32 LoadGlobalU16(const U64& address); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d9e0d5471..74e956930 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -63,6 +63,7 @@ OPCODE(SetTRFlag, Void, U1, OPCODE(SetMXFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(LaneId, U32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index a295f4c5e..731ac643f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -99,6 +99,8 @@ enum class SpecialRegister : u64 { return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: return ir.Imm32(Common::BitCast(1.0f)); + case SpecialRegister::SR_LANEID: + return ir.LaneId(); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 514de6838..5c1b81638 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -340,6 +340,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: + case IR::Opcode::LaneId: info.uses_subgroup_invocation_id = true; break; case IR::Opcode::GetCbufU8: From baec84247fe815199595d9e8077b71f3b5c8317e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 01:48:39 +0200 Subject: [PATCH 165/770] shader: Address Feedback --- .../backend/spirv/emit_context.cpp | 3 +- .../backend/spirv/emit_context.h | 2 - .../backend/spirv/emit_spirv.cpp | 4 +- .../backend/spirv/emit_spirv.h | 12 ++--- .../backend/spirv/emit_spirv_barriers.cpp | 37 ++++++-------- .../spirv/emit_spirv_context_get_set.cpp | 39 ++------------- .../frontend/ir/ir_emitter.cpp | 49 +++++------------- .../frontend/ir/ir_emitter.h | 12 +---- src/shader_recompiler/frontend/ir/modifiers.h | 13 +---- src/shader_recompiler/frontend/ir/opcodes.inc | 12 ++--- .../translate/impl/barrier_operations.cpp | 12 ++--- .../translate/impl/move_special_register.cpp | 2 + .../frontend/maxwell/translate/impl/vote.cpp | 5 +- .../ir_opt/constant_propagation_pass.cpp | 9 ++-- .../global_memory_to_storage_buffer_pass.cpp | 10 ++-- .../ir_opt/ssa_rewrite_pass.cpp | 50 +------------------ 16 files changed, 60 insertions(+), 211 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 794cd4ed8..32f679f2a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -510,7 +510,8 @@ void EmitContext::DefineOutputs(const Info& info) { const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); } - if (info.stores_viewport_index && !ignore_viewport_layer) { + if (info.stores_viewport_index && + (profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in Fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 1573c2560..f4715a709 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -134,8 +134,6 @@ public: std::vector interfaces; - bool ignore_viewport_layer{}; - private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index cc6b98f7e..191380db0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -228,11 +228,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.stores_viewport_index) { ctx.AddCapability(spv::Capability::MultiViewport); if (profile.support_viewport_index_layer_non_geometry && - ctx.stage == Shader::Stage::VertexB) { + ctx.stage != Shader::Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); - } else { - ctx.ignore_viewport_layer = true; } } if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 17a452e0e..5d0f16b3a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -28,7 +28,9 @@ void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); -void EmitMemoryBarrier(EmitContext& ctx, IR::Inst* inst); +void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx); +void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); +void EmitMemoryBarrierSystemLevel(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); @@ -60,14 +62,6 @@ void EmitSetZFlag(EmitContext& ctx); void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); -void EmitGetFCSMFlag(EmitContext& ctx); -void EmitGetTAFlag(EmitContext& ctx); -void EmitGetTRFlag(EmitContext& ctx); -void EmitGetMXFlag(EmitContext& ctx); -void EmitSetFCSMFlag(EmitContext& ctx); -void EmitSetTAFlag(EmitContext& ctx); -void EmitSetTRFlag(EmitContext& ctx); -void EmitSetMXFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 413ac25a0..18f512319 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -7,34 +7,27 @@ namespace Shader::Backend::SPIRV { namespace { -spv::Scope MemoryScopeToSpirVScope(IR::MemoryScope scope) { - switch (scope) { - case IR::MemoryScope::Warp: - return spv::Scope::Subgroup; - case IR::MemoryScope::Workgroup: - return spv::Scope::Workgroup; - case IR::MemoryScope::Device: - return spv::Scope::Device; - case IR::MemoryScope::System: - return spv::Scope::CrossDevice; - case IR::MemoryScope::DontCare: - return spv::Scope::Invocation; - default: - throw NotImplementedException("Unknown memory scope!"); - } -} - -} // namespace - -void EmitMemoryBarrier(EmitContext& ctx, IR::Inst* inst) { - const auto info{inst->Flags()}; +void EmitMemoryBarrierImpl(EmitContext& ctx, spv::Scope scope) { const auto semantics = spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; - const auto scope = MemoryScopeToSpirVScope(info.scope); ctx.OpMemoryBarrier(ctx.Constant(ctx.U32[1], static_cast(scope)), ctx.Constant(ctx.U32[1], static_cast(semantics))); } +} // Anonymous namespace + +void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx) { + EmitMemoryBarrierImpl(ctx, spv::Scope::Workgroup); +} + +void EmitMemoryBarrierDeviceLevel(EmitContext& ctx) { + EmitMemoryBarrierImpl(ctx, spv::Scope::Device); +} + +void EmitMemoryBarrierSystemLevel(EmitContext& ctx) { + EmitMemoryBarrierImpl(ctx, spv::Scope::CrossDevice); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f13c0ee72..caab9aa12 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -58,7 +58,10 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::ViewportIndex: - return ctx.ignore_viewport_layer ? std::nullopt : std::optional{ctx.viewport_index}; + return (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) + ? std::optional{ctx.viewport_index} + : std::nullopt; default: throw NotImplementedException("Read attribute {}", attr); } @@ -206,7 +209,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { - auto output = OutputAttrPointer(ctx, attr); + const std::optional output{OutputAttrPointer(ctx, attr)}; if (!output) { return; } @@ -263,38 +266,6 @@ void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitGetFCSMFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitGetTAFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitGetTRFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitGetMXFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSetFCSMFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSetTAFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSetTRFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSetMXFlag(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b5f61956a..5e94edd74 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,8 +82,17 @@ void IREmitter::SelectionMerge(Block* merge_block) { Inst(Opcode::SelectionMerge, merge_block); } -void IREmitter::MemoryBarrier(BarrierInstInfo info) { - Inst(Opcode::MemoryBarrier, Flags{info}); +void IREmitter::MemoryBarrier(MemoryScope scope) { + switch (scope) { + case MemoryScope::Workgroup: + Inst(Opcode::MemoryBarrierWorkgroupLevel); + case MemoryScope::Device: + Inst(Opcode::MemoryBarrierDeviceLevel); + case MemoryScope::System: + Inst(Opcode::MemoryBarrierSystemLevel); + default: + throw InvalidArgument("Invalid memory scope {}", scope); + } } void IREmitter::Return() { @@ -202,38 +211,6 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U1 IREmitter::GetFCSMFlag() { - return Inst(Opcode::GetFCSMFlag); -} - -U1 IREmitter::GetTAFlag() { - return Inst(Opcode::GetTAFlag); -} - -U1 IREmitter::GetTRFlag() { - return Inst(Opcode::GetTRFlag); -} - -U1 IREmitter::GetMXFlag() { - return Inst(Opcode::GetMXFlag); -} - -void IREmitter::SetFCSMFlag(const U1& value) { - Inst(Opcode::SetFCSMFlag, value); -} - -void IREmitter::SetTAFlag(const U1& value) { - Inst(Opcode::SetTAFlag, value); -} - -void IREmitter::SetTRFlag(const U1& value) { - Inst(Opcode::SetTRFlag, value); -} - -void IREmitter::SetMXFlag(const U1& value) { - Inst(Opcode::SetMXFlag, value); -} - static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { switch (flow_test) { case FlowTest::F: @@ -292,9 +269,9 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); - case FlowTest::FCSM_TR: - return ir.LogicalAnd(ir.GetFCSMFlag(), ir.GetTRFlag()); + // LOG_WARNING(ShaderDecompiler, "FCSM_TR CC State (Stubbed)"); + return ir.Imm1(false); case FlowTest::CSM_TA: case FlowTest::CSM_TR: case FlowTest::CSM_MX: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e034d672f..14b743975 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -70,16 +70,6 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); - [[nodiscard]] U1 GetFCSMFlag(); - [[nodiscard]] U1 GetTAFlag(); - [[nodiscard]] U1 GetTRFlag(); - [[nodiscard]] U1 GetMXFlag(); - - void SetFCSMFlag(const U1& value); - void SetTAFlag(const U1& value); - void SetTRFlag(const U1& value); - void SetMXFlag(const U1& value); - [[nodiscard]] U1 Condition(IR::Condition cond); [[nodiscard]] U1 GetFlowTestResult(FlowTest test); @@ -138,7 +128,7 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); - [[nodiscard]] void MemoryBarrier(BarrierInstInfo info); + [[nodiscard]] void MemoryBarrier(MemoryScope scope); template [[nodiscard]] Dest BitCast(const Source& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 7730c25a9..2aa4ac79b 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,13 +25,7 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; -enum class MemoryScope : u32 { - DontCare, - Warp, - Workgroup, - Device, - System -}; +enum class MemoryScope : u32 { DontCare, Warp, Workgroup, Device, System }; struct FpControl { bool no_contraction{false}; @@ -40,11 +34,6 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); -union BarrierInstInfo { - u32 raw; - BitField<0, 3, MemoryScope> scope; -}; - union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 74e956930..3640a5d24 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -17,7 +17,9 @@ OPCODE(Unreachable, Void, OPCODE(DemoteToHelperInvocation, Void, Label, ) // Barriers -OPCODE(MemoryBarrier, Void, ) +OPCODE(MemoryBarrierWorkgroupLevel, Void, ) +OPCODE(MemoryBarrierDeviceLevel, Void, ) +OPCODE(MemoryBarrierSystemLevel, Void, ) // Special operations OPCODE(Prologue, Void, ) @@ -49,18 +51,10 @@ OPCODE(GetZFlag, U1, Void OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) OPCODE(GetOFlag, U1, Void, ) -OPCODE(GetFCSMFlag, U1, Void, ) -OPCODE(GetTAFlag, U1, Void, ) -OPCODE(GetTRFlag, U1, Void, ) -OPCODE(GetMXFlag, U1, Void, ) OPCODE(SetZFlag, Void, U1, ) OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) -OPCODE(SetFCSMFlag, Void, U1, ) -OPCODE(SetTAFlag, Void, U1, ) -OPCODE(SetTRFlag, Void, U1, ) -OPCODE(SetMXFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) OPCODE(LaneId, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 933af572c..26d5e276b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -5,8 +5,8 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/modifiers.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { @@ -21,28 +21,24 @@ enum class LocalScope : u64 { IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { switch (scope) { case LocalScope::CTG: - return IR::MemoryScope::Warp; + return IR::MemoryScope::Workgroup; case LocalScope::GL: return IR::MemoryScope::Device; case LocalScope::SYS: return IR::MemoryScope::System; - case LocalScope::VC: - return IR::MemoryScope::Workgroup; // or should be device? default: throw NotImplementedException("Unimplemented Local Scope {}", scope); } } -} // namespace +} // Anonymous namespace void TranslatorVisitor::MEMBAR(u64 inst) { union { u64 raw; BitField<8, 2, LocalScope> scope; } membar{inst}; - IR::BarrierInstInfo info{}; - info.scope.Assign(LocalScopeToMemoryScope(membar.scope)); - ir.MemoryBarrier(info); + ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); } void TranslatorVisitor::DEPBAR() { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 731ac643f..7d9c42a83 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -96,8 +96,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: + // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_XY (Stubbed)"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: + // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_Z (Stubbed)"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 2acabb662..d508e1e23 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -50,10 +50,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING("VOTE.VTG: Stubbed!"); - auto imm = ir.Imm1(false); - ir.SetFCSMFlag(imm); - ir.SetTRFlag(imm); + // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 8999c3a3d..1720d7a09 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -374,17 +374,14 @@ std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opco if (inst->Opcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { return std::nullopt; } - IR::Value value_index{inst->Arg(2)}; if (!value_index.IsImmediate()) { return std::nullopt; } - - const u32 second_index = value_index.U32(); + const u32 second_index{value_index.U32()}; if (first_index != second_index) { IR::Value value_composite{inst->Arg(0)}; if (value_composite.IsImmediate()) { @@ -404,8 +401,8 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser if (!value_2.IsImmediate()) { return; } - const u32 first_index = value_2.U32(); - auto result = FoldCompositeExtractImpl(value_1, insert, construct, first_index); + const u32 first_index{value_2.U32()}; + const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)}; if (!result) { return; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index d4bae249b..8876a5c33 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -4,9 +4,9 @@ #include #include +#include #include #include -#include #include #include @@ -295,12 +295,12 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction - const bool is_a_write = IsGlobalMemoryWrite(inst); - auto it = writes_map.find(*storage_buffer); + const bool is_a_write{IsGlobalMemoryWrite(inst)}; + auto it{writes_map.find(*storage_buffer)}; if (it == writes_map.end()) { - writes_map[*storage_buffer] = is_a_write; + writes_map[*storage_buffer] = is_a_write; } else { - it->second = it->second || is_a_write; + it->second = it->second || is_a_write; } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7dab33034..72d4abb77 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -38,10 +38,6 @@ struct ZeroFlagTag : FlagTag {}; struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; -struct FCSMFlagTag : FlagTag {}; -struct TAFlagTag : FlagTag {}; -struct TRFlagTag : FlagTag {}; -struct MXFlagTag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -57,8 +53,7 @@ struct IndirectBranchVariable { }; using Variant = std::variant; + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -94,22 +89,6 @@ struct DefTable { return overflow_flag; } - [[nodiscard]] ValueMap& operator[](FCSMFlagTag) noexcept { - return fcsm_flag; - } - - [[nodiscard]] ValueMap& operator[](TAFlagTag) noexcept { - return ta_flag; - } - - [[nodiscard]] ValueMap& operator[](TRFlagTag) noexcept { - return tr_flag; - } - - [[nodiscard]] ValueMap& operator[](MXFlagTag) noexcept { - return mr_flag; - } - std::array regs; std::array preds; boost::container::flat_map goto_vars; @@ -118,10 +97,6 @@ struct DefTable { ValueMap sign_flag; ValueMap carry_flag; ValueMap overflow_flag; - ValueMap fcsm_flag; - ValueMap ta_flag; - ValueMap tr_flag; - ValueMap mr_flag; }; IR::Opcode UndefOpcode(IR::Reg) noexcept { @@ -272,18 +247,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetOFlag: pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; - case IR::Opcode::SetFCSMFlag: - pass.WriteVariable(FCSMFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetTAFlag: - pass.WriteVariable(TAFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetTRFlag: - pass.WriteVariable(TRFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetMXFlag: - pass.WriteVariable(MXFlagTag{}, block, inst.Arg(0)); - break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); @@ -312,17 +275,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetOFlag: inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; - case IR::Opcode::GetFCSMFlag: - inst.ReplaceUsesWith(pass.ReadVariable(FCSMFlagTag{}, block)); - break; - case IR::Opcode::GetTAFlag: - inst.ReplaceUsesWith(pass.ReadVariable(TAFlagTag{}, block)); - break; - case IR::Opcode::GetTRFlag: - inst.ReplaceUsesWith(pass.ReadVariable(TRFlagTag{}, block)); - break; - case IR::Opcode::GetMXFlag: - inst.ReplaceUsesWith(pass.ReadVariable(MXFlagTag{}, block)); break; default: break; From 480dc0d5e68fd1c79345e93216013a1d2e172c70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:27:25 +0200 Subject: [PATCH 166/770] vk_pipeline_cache: Small fixes to the pipeline cache --- .../renderer_vulkan/vk_pipeline_cache.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1f308eec2..3111165fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,11 +539,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } else { @@ -558,11 +560,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } From 80df541a0860eecc599f60a7b2955e1e286bc48a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:28:44 +0200 Subject: [PATCH 167/770] shader: "Implement" NOP --- .../frontend/maxwell/translate/impl/not_implemented.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 80a6ed578..acabb0118 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -183,7 +183,7 @@ void TranslatorVisitor::LONGJMP(u64) { } void TranslatorVisitor::NOP(u64) { - ThrowNotImplemented(Opcode::NOP); + // NOP is No-Op. } void TranslatorVisitor::OUT_reg(u64) { From ed6a1b1a3def4b8ed8c8fd1a7774a0a14edefc70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:34:07 +0200 Subject: [PATCH 168/770] shader: Address feedback --- src/shader_recompiler/frontend/ir/modifiers.h | 8 +++++++- .../global_memory_to_storage_buffer_pass.cpp | 20 +++++++++---------- .../ir_opt/ssa_rewrite_pass.cpp | 1 - 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 2aa4ac79b..461671326 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,7 +25,13 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; -enum class MemoryScope : u32 { DontCare, Warp, Workgroup, Device, System }; +enum class MemoryScope : u32 { + DontCare, + Warp, + Workgroup, + Device, + System, +}; struct FpControl { bool no_contraction{false}; diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 8876a5c33..c8bd7b329 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -46,7 +46,9 @@ using StorageBufferSet = using StorageInstVector = boost::container::small_vector; using VisitedBlocks = boost::container::flat_set, boost::container::small_vector>; -using StorageWritesMap = std::map; +using StorageWritesSet = + boost::container::flat_set, + boost::container::small_vector>; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -266,7 +268,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, /// Collects the storage buffer used by a global memory instruction and the instruction itself void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace, StorageWritesMap& writes_map) { + StorageInstVector& to_replace, StorageWritesSet& writes_set) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -295,12 +297,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction - const bool is_a_write{IsGlobalMemoryWrite(inst)}; - auto it{writes_map.find(*storage_buffer)}; - if (it == writes_map.end()) { - writes_map[*storage_buffer] = is_a_write; - } else { - it->second = it->second || is_a_write; + if (IsGlobalMemoryWrite(inst)) { + writes_set.insert(*storage_buffer); } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ @@ -375,14 +373,14 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - StorageWritesMap writes_map; + StorageWritesSet writes_set; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_map); + CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); } } Info& info{program.info}; @@ -392,7 +390,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, - .is_written{writes_map[storage_buffer]}, + .is_written{writes_set.contains(storage_buffer)}, }); ++storage_index; } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 72d4abb77..259233746 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -275,7 +275,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetOFlag: inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; - break; default: break; } From e7700aad183047f0c6fa990e1f424448d00c8865 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 03:01:12 +0200 Subject: [PATCH 169/770] shader: Fix undetected bug from review --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5e94edd74..dbfc670b0 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -86,10 +86,13 @@ void IREmitter::MemoryBarrier(MemoryScope scope) { switch (scope) { case MemoryScope::Workgroup: Inst(Opcode::MemoryBarrierWorkgroupLevel); + break; case MemoryScope::Device: Inst(Opcode::MemoryBarrierDeviceLevel); + break; case MemoryScope::System: Inst(Opcode::MemoryBarrierSystemLevel); + break; default: throw InvalidArgument("Invalid memory scope {}", scope); } From ca7ebdc471cfd9549b15f8ae5523c6fdddca57e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:16:09 -0300 Subject: [PATCH 170/770] shader: Fix FADD32I --- .../maxwell/translate/impl/floating_point_add.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 487198aa6..b39950c84 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -68,17 +68,15 @@ void TranslatorVisitor::FADD32I(u64 insn) { union { u64 raw; BitField<55, 1, u64> ftz; - BitField<53, 1, u64> neg_b; + BitField<56, 1, u64> neg_a; BitField<54, 1, u64> abs_a; BitField<52, 1, u64> cc; - BitField<56, 1, u64> neg_a; + BitField<53, 1, u64> neg_b; BitField<57, 1, u64> abs_b; - BitField<50, 1, u64> sat; } const fadd32i{insn}; - FADD(*this, insn, fadd32i.sat != 0, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, - GetFloatImm32(insn), fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, - fadd32i.neg_b != 0); + FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), + fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); } } // namespace Shader::Maxwell From c4aab5c40ec1347da9811169bbc3dfb23632ab98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:17:08 -0300 Subject: [PATCH 171/770] shader: Fix fp16 merge when using native fp16 --- .../maxwell/translate/impl/half_floating_point_helper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp index d0c6ba1aa..0dbeb7f56 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -51,9 +51,9 @@ IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const I case Merge::MRG_H0: case Merge::MRG_H1: { const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; - const bool h0{merge == Merge::MRG_H0}; - const IR::F16& insert{h0 ? lhs : rhs}; - return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + const bool is_h0{merge == Merge::MRG_H0}; + const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); } } throw InvalidArgument("Invalid merge {}", merge); From 9a342f5605aef385612053d7d8b564f541952eae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:18:12 -0300 Subject: [PATCH 172/770] shader: Rework global memory tracking to use breadth-first search --- .../global_memory_to_storage_buffer_pass.cpp | 151 ++++++++++-------- 1 file changed, 81 insertions(+), 70 deletions(-) diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index c8bd7b329..f94c82e21 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -4,9 +4,9 @@ #include #include -#include #include #include +#include #include #include @@ -40,15 +40,19 @@ struct Bias { u32 offset_end; }; +using boost::container::flat_set; +using boost::container::small_vector; using StorageBufferSet = - boost::container::flat_set, - boost::container::small_vector>; -using StorageInstVector = boost::container::small_vector; -using VisitedBlocks = boost::container::flat_set, - boost::container::small_vector>; + flat_set, small_vector>; +using StorageInstVector = small_vector; using StorageWritesSet = - boost::container::flat_set, - boost::container::small_vector>; + flat_set, small_vector>; + +struct StorageInfo { + StorageBufferSet set; + StorageInstVector to_replace; + StorageWritesSet writes; +}; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -215,60 +219,72 @@ std::optional TrackLowAddress(IR::Inst* inst) { }; } -/// Recursively tries to track the storage buffer address used by a global memory instruction -std::optional Track(IR::Block* block, const IR::Value& value, const Bias* bias, - VisitedBlocks& visited) { - if (value.IsImmediate()) { - // Immediates can't be a storage buffer +/// Tries to get the storage buffer out of a constant buffer read instruction +std::optional TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) { + if (inst->Opcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } - const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbufU32) { - const IR::Value index{inst->Arg(0)}; - const IR::Value offset{inst->Arg(1)}; - if (!index.IsImmediate()) { - // Definitely not a storage buffer if it's read from a non-immediate index - return std::nullopt; - } - if (!offset.IsImmediate()) { - // TODO: Support SSBO arrays - return std::nullopt; - } - const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, - }; - if (bias && !MeetsBias(storage_buffer, *bias)) { - // We have to blacklist some addresses in case we wrongly point to them - return std::nullopt; - } - return storage_buffer; + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a non-immediate index + return std::nullopt; } - // Reversed loops are more likely to find the right result - for (size_t arg = inst->NumArgs(); arg--;) { - IR::Block* inst_block{block}; - if (inst->Opcode() == IR::Opcode::Phi) { - // If we are going through a phi node, mark the current block as visited - visited.insert(block); - // and skip already visited blocks to avoid looping forever - IR::Block* const phi_block{inst->PhiBlock(arg)}; - if (visited.contains(phi_block)) { - // Already visited, skip + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly point to them + return std::nullopt; + } + return storage_buffer; +} + +/// Tries to track the storage buffer address used by a global memory instruction +std::optional Track(const IR::Value& value, const Bias* bias) { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + // Breadth-first search visiting the right most arguments first + // Small vector has been determined from shaders in Super Smash Bros. Ultimate + small_vector visited; + std::queue queue; + queue.push(value.InstRecursive()); + + while (!queue.empty()) { + // Pop one instruction from the queue + const IR::Inst* const inst{queue.front()}; + queue.pop(); + if (const std::optional result = TryGetStorageBuffer(inst, bias)) { + // This is the instruction we were looking for + return result; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + const IR::Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { continue; } - inst_block = phi_block; - } - const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; + // Queue instruction if it hasn't been visited + const IR::Inst* const arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } } } + // SSA tree has been traversed and the origin hasn't been found return std::nullopt; } /// Collects the storage buffer used by a global memory instruction and the instruction itself -void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace, StorageWritesSet& writes_set) { +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -284,24 +300,23 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - VisitedBlocks visited_blocks; - std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)}; + std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias - visited_blocks.clear(); - storage_buffer = Track(&block, low_addr, nullptr, visited_blocks); + storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also failed, drop the global memory usage + // LOG_ERROR DiscardGlobalMemory(block, inst); return; } } // Collect storage buffer and the instruction if (IsGlobalMemoryWrite(inst)) { - writes_set.insert(*storage_buffer); + info.writes.insert(*storage_buffer); } - storage_buffer_set.insert(*storage_buffer); - to_replace.push_back(StorageInst{ + info.set.insert(*storage_buffer); + info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, .inst{&inst}, .block{&block}, @@ -371,33 +386,29 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, } // Anonymous namespace void GlobalMemoryToStorageBufferPass(IR::Program& program) { - StorageBufferSet storage_buffers; - StorageInstVector to_replace; - StorageWritesSet writes_set; - + StorageInfo info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); + CollectStorageBuffers(*block, inst, info); } } - Info& info{program.info}; u32 storage_index{}; - for (const StorageBufferAddr& storage_buffer : storage_buffers) { - info.storage_buffers_descriptors.push_back({ + for (const StorageBufferAddr& storage_buffer : info.set) { + program.info.storage_buffers_descriptors.push_back({ .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, - .is_written{writes_set.contains(storage_buffer)}, + .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; } - for (const StorageInst& storage_inst : to_replace) { + for (const StorageInst& storage_inst : info.to_replace) { const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; - const auto it{storage_buffers.find(storage_inst.storage_buffer)}; - const IR::U32 index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + const auto it{info.set.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast(info.set.index_of(it))}}; IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; From 6ff2e9ba097f3619c21d2e547570e1fbaae8d6ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:19:13 -0300 Subject: [PATCH 173/770] vk_pipeline_cache: Remove unnecesary scope in pipeline cache locking --- .../renderer_vulkan/vk_pipeline_cache.cpp | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3111165fb..f88ab67ae 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,13 +539,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - { - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { @@ -560,13 +558,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - { - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } @@ -635,7 +631,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, - .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_index_layer_non_geometry = + device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, From 0b26f2b90ea4fe6097d982b72dfe38c0a3658ad0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 06:40:16 -0300 Subject: [PATCH 174/770] shader: Remove unused header in VOTE --- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index d508e1e23..0793611ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" From 5ed68e83db39e1f6790a625529f10f4e1d5a8f89 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 21:41:49 -0300 Subject: [PATCH 175/770] shader: Remove atomic flags and use mutex + cond variable for pipelines --- .../renderer_vulkan/vk_compute_pipeline.cpp | 15 ++++++++++----- .../renderer_vulkan/vk_compute_pipeline.h | 7 ++++++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++++++++++---- .../renderer_vulkan/vk_graphics_pipeline.h | 7 ++++++- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6707842ab..0bb5b852d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -55,8 +55,9 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip .basePipelineHandle = 0, .basePipelineIndex = 0, }); - building_flag.test_and_set(); - building_flag.notify_all(); + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -75,7 +76,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); @@ -112,9 +114,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, update_descriptor_queue, image_index); - if (!building_flag.test()) { + if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } scheduler.Record([this](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 02da504f7..104e6cc85 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include "common/common_types.h" @@ -47,7 +49,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e8c3a5624..67de3cb79 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -135,8 +135,10 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); - building_flag.test_and_set(); - building_flag.notify_all(); + + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); @@ -196,8 +198,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); - if (!building_flag.test()) { - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + if (!is_built.load(std::memory_order::relaxed)) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } if (scheduler.UpdateGraphicsPipeline(this)) { scheduler.Record([this](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4e0583157..7d14d2378 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -63,7 +65,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan From 5b3c6d59c2c92ac388530740f8008f1b9764c14d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 22:28:07 -0300 Subject: [PATCH 176/770] vk_compute_pass: Fix compute passes --- .../renderer_vulkan/vk_compute_pass.cpp | 39 ++++++++----------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 + .../renderer_vulkan/vk_rasterizer.cpp | 1 - 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 760857839..2cfe9d4bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,27 +206,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); - /* - FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, .layout = *layout, .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); - */ } VKComputePass::~VKComputePass() = default; @@ -262,8 +258,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -271,8 +266,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); @@ -319,8 +314,8 @@ std::pair QuadIndexedPass::Assemble( const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -329,9 +324,9 @@ std::pair QuadIndexedPass::Assemble( .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; const std::array push_constants = {base_vertex, index_shift}; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 67de3cb79..a0ef0e98b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -189,6 +189,8 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.BindHostGeometryBuffers(is_indexed); + update_descriptor_queue.Acquire(); + size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0bd4b8af..0292a1b94 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -172,7 +172,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { if (!pipeline) { return; } - update_descriptor_queue.Acquire(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); From 3f594dd86bd1ee1b178109132482c7d6b43e66dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 02:31:09 -0300 Subject: [PATCH 177/770] shader: Reimplement GetCbufU64 as GetCbufU32x2 It may generate better code on some compilers and it's easier to handle. --- src/shader_recompiler/backend/spirv/emit_context.cpp | 4 ++-- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 2 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++-- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 +++--- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++-- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- .../maxwell/translate/impl/load_constant.cpp | 12 ++++++------ .../ir_opt/collect_shader_info_pass.cpp | 7 +++---- 9 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 32f679f2a..e70b78a28 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -308,8 +308,8 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (True(info.used_constant_buffer_types & IR::Type::F32)) { DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32)); } - if (True(info.used_constant_buffer_types & IR::Type::U64)) { - DefineConstantBuffers(info, &UniformDefinitions::U64, binding, U64, 'u', sizeof(u64)); + if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { + DefineConstantBuffers(info, &UniformDefinitions::U32x2, binding, U32[2], 'u', sizeof(u64)); } for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index f4715a709..3a686a78c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -42,7 +42,7 @@ struct UniformDefinitions { Id S16{}; Id U32{}; Id F32{}; - Id U64{}; + Id U32x2{}; }; class EmitContext final : public Sirit::Module { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5d0f16b3a..e066ba87d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -47,7 +47,7 @@ Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); void EmitGetAttributeIndexed(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index caab9aa12..1bfc60294 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -153,8 +153,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); } -Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset); } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index dbfc670b0..dbd38a28b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -162,8 +162,8 @@ U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { return Inst(Opcode::GetCbufU32, binding, byte_offset); } -UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, - bool is_signed) { +Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed) { switch (bitsize) { case 8: return Inst(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); @@ -172,7 +172,7 @@ UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsi case 32: return Inst(Opcode::GetCbufU32, binding, byte_offset); case 64: - return Inst(Opcode::GetCbufU64, binding, byte_offset); + return Inst(Opcode::GetCbufU32x2, binding, byte_offset); default: throw InvalidArgument("Invalid bit size {}", bitsize); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 14b743975..81a57fefe 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -56,8 +56,8 @@ public: void SetIndirectBranchVariable(const U32& value); [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); - [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, - bool is_signed); + [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed); [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 3640a5d24..734f5328b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -40,7 +40,7 @@ OPCODE(GetCbufU16, U32, U32, OPCODE(GetCbufS16, U32, U32, U32, ) OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) -OPCODE(GetCbufU64, U64, U32, U32, ) +OPCODE(GetCbufU32x2, U32x2, U32, U32, ) OPCODE(GetAttribute, F32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, F32, ) OPCODE(GetAttributeIndexed, F32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 49ccb7d62..ae3ecea32 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -30,25 +30,25 @@ void TranslatorVisitor::LDC(u64 insn) { const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; switch (ldc.size) { case Size::U8: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); break; case Size::S8: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); break; case Size::U16: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); break; case Size::S16: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); break; case Size::B32: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); break; case Size::B64: { if (!IR::IsAligned(ldc.dest_reg, 2)) { throw NotImplementedException("Unaligned destination register"); } - const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5c1b81638..07f031ea6 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -271,7 +271,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { - case IR::Opcode::GetCbufU64: case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -349,7 +348,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufS16: case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufF32: - case IR::Opcode::GetCbufU64: { + case IR::Opcode::GetCbufU32x2: { if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { AddConstantBufferDescriptor(info, index.U32(), 1); } else { @@ -370,8 +369,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufF32: info.used_constant_buffer_types |= IR::Type::F32; break; - case IR::Opcode::GetCbufU64: - info.used_constant_buffer_types |= IR::Type::U64; + case IR::Opcode::GetCbufU32x2: + info.used_constant_buffer_types |= IR::Type::U32x2; break; default: break; From 85795de99f27e57ddf97696e7915ddd4bdf02976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 03:00:41 -0300 Subject: [PATCH 178/770] shader: Abstract breadth searches and use the abstraction --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/ir/breadth_first_search.h | 57 +++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 84 ++++++------------- src/shader_recompiler/ir_opt/texture_pass.cpp | 68 +++++---------- 4 files changed, 106 insertions(+), 104 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/breadth_first_search.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 181eac9f2..700b17113 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -27,6 +27,7 @@ add_library(shader_recompiler STATIC frontend/ir/attribute.h frontend/ir/basic_block.cpp frontend/ir/basic_block.h + frontend/ir/breadth_first_search.h frontend/ir/condition.cpp frontend/ir/condition.h frontend/ir/flow_test.cpp diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h new file mode 100644 index 000000000..b35f062d4 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +template +auto BreadthFirstSearch(const Value& value, Pred&& pred) + -> std::invoke_result_t { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + // Breadth-first search visiting the right most arguments first + // Small vector has been determined from shaders in Super Smash Bros. Ultimate + boost::container::small_vector visited; + std::queue queue; + queue.push(value.InstRecursive()); + + while (!queue.empty()) { + // Pop one instruction from the queue + const Inst* const inst{queue.front()}; + queue.pop(); + if (const std::optional result = pred(inst)) { + // This is the instruction we were looking for + return result; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + const Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + const Inst* const arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + // SSA tree has been traversed and the result hasn't been found + return std::nullopt; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f94c82e21..0858a0bdd 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -12,6 +12,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/ir_opt/passes.h" @@ -219,68 +220,35 @@ std::optional TrackLowAddress(IR::Inst* inst) { }; } -/// Tries to get the storage buffer out of a constant buffer read instruction -std::optional TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { - return std::nullopt; - } - const IR::Value index{inst->Arg(0)}; - const IR::Value offset{inst->Arg(1)}; - if (!index.IsImmediate()) { - // Definitely not a storage buffer if it's read from a non-immediate index - return std::nullopt; - } - if (!offset.IsImmediate()) { - // TODO: Support SSBO arrays - return std::nullopt; - } - const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, - }; - if (bias && !MeetsBias(storage_buffer, *bias)) { - // We have to blacklist some addresses in case we wrongly point to them - return std::nullopt; - } - return storage_buffer; -} - /// Tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { - if (value.IsImmediate()) { - // Nothing to do with immediates - return std::nullopt; - } - // Breadth-first search visiting the right most arguments first - // Small vector has been determined from shaders in Super Smash Bros. Ultimate - small_vector visited; - std::queue queue; - queue.push(value.InstRecursive()); - - while (!queue.empty()) { - // Pop one instruction from the queue - const IR::Inst* const inst{queue.front()}; - queue.pop(); - if (const std::optional result = TryGetStorageBuffer(inst, bias)) { - // This is the instruction we were looking for - return result; + const auto pred{[bias](const IR::Inst* inst) -> std::optional { + if (inst->Opcode() != IR::Opcode::GetCbufU32) { + return std::nullopt; } - // Visit the right most arguments first - for (size_t arg = inst->NumArgs(); arg--;) { - const IR::Value arg_value{inst->Arg(arg)}; - if (arg_value.IsImmediate()) { - continue; - } - // Queue instruction if it hasn't been visited - const IR::Inst* const arg_inst{arg_value.InstRecursive()}; - if (std::ranges::find(visited, arg_inst) == visited.end()) { - visited.push_back(arg_inst); - queue.push(arg_inst); - } + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Definitely not a storage buffer if it's read from a + // non-immediate index + return std::nullopt; } - } - // SSA tree has been traversed and the origin hasn't been found - return std::nullopt; + if (!offset.IsImmediate()) { + // TODO: Support SSBO arrays + return std::nullopt; + } + const StorageBufferAddr storage_buffer{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + if (bias && !MeetsBias(storage_buffer, *bias)) { + // We have to blacklist some addresses in case we wrongly + // point to them + return std::nullopt; + } + return storage_buffer; + }}; + return BreadthFirstSearch(value, pred); } /// Collects the storage buffer used by a global memory instruction and the instruction itself diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index da8977b76..bcb94ce4d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -2,13 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include -#include #include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" @@ -28,9 +29,6 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; -using VisitedBlocks = boost::container::flat_set, - boost::container::small_vector>; - IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -101,57 +99,35 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } -std::optional Track(IR::Block* block, const IR::Value& value, - VisitedBlocks& visited) { - if (value.IsImmediate()) { - // Immediates can't be a storage buffer +std::optional TryGetConstBuffer(const IR::Inst* inst) { + if (inst->Opcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } - const IR::Inst* const inst{value.InstRecursive()}; - if (inst->Opcode() == IR::Opcode::GetCbufU32) { - const IR::Value index{inst->Arg(0)}; - const IR::Value offset{inst->Arg(1)}; - if (!index.IsImmediate()) { - // Reading a bindless texture from variable indices is valid - // but not supported here at the moment - return std::nullopt; - } - if (!offset.IsImmediate()) { - // TODO: Support arrays of textures - return std::nullopt; - } - return ConstBufferAddr{ - .index{index.U32()}, - .offset{offset.U32()}, - }; + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; } - // Reversed loops are more likely to find the right result - for (size_t arg = inst->NumArgs(); arg--;) { - IR::Block* inst_block{block}; - if (inst->Opcode() == IR::Opcode::Phi) { - // If we are going through a phi node, mark the current block as visited - visited.insert(block); - // and skip already visited blocks to avoid looping forever - IR::Block* const phi_block{inst->PhiBlock(arg)}; - if (visited.contains(phi_block)) { - // Already visited, skip - continue; - } - inst_block = phi_block; - } - const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; - if (storage_buffer) { - return *storage_buffer; - } + if (!offset.IsImmediate()) { + // TODO: Support arrays of textures + return std::nullopt; } - return std::nullopt; + return ConstBufferAddr{ + .index{index.U32()}, + .offset{offset.U32()}, + }; +} + +std::optional Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); } TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { - VisitedBlocks visited; - const std::optional track_addr{Track(block, inst.Arg(0), visited)}; + const std::optional track_addr{Track(inst.Arg(0))}; if (!track_addr) { throw NotImplementedException("Failed to track bindless texture constant buffer"); } From fc93bc2abde0b54a0a495f9b28a76fd34b47f320 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 03:04:48 -0300 Subject: [PATCH 179/770] shader: Implement BAR and fix memory barriers --- .../backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 15 ++++- .../frontend/ir/ir_emitter.cpp | 4 ++ .../frontend/ir/ir_emitter.h | 1 + .../frontend/ir/microinstruction.cpp | 4 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../translate/impl/barrier_operations.cpp | 58 ++++++++++++++++++- 7 files changed, 79 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index e066ba87d..032b0b2f9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -28,6 +28,7 @@ void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitBarrier(EmitContext& ctx); void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx); void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); void EmitMemoryBarrierSystemLevel(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 18f512319..74f523d0f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -8,16 +8,25 @@ namespace Shader::Backend::SPIRV { namespace { void EmitMemoryBarrierImpl(EmitContext& ctx, spv::Scope scope) { - const auto semantics = + const auto semantics{ spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | - spv::MemorySemanticsMask::ImageMemory; + spv::MemorySemanticsMask::ImageMemory}; ctx.OpMemoryBarrier(ctx.Constant(ctx.U32[1], static_cast(scope)), ctx.Constant(ctx.U32[1], static_cast(semantics))); } - } // Anonymous namespace +void EmitBarrier(EmitContext& ctx) { + const auto execution{spv::Scope::Workgroup}; + const auto memory{spv::Scope::Workgroup}; + const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | + spv::MemorySemanticsMask::WorkgroupMemory}; + ctx.OpControlBarrier(ctx.Constant(ctx.U32[1], static_cast(execution)), + ctx.Constant(ctx.U32[1], static_cast(memory)), + ctx.Constant(ctx.U32[1], static_cast(memory_semantics))); +} + void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx) { EmitMemoryBarrierImpl(ctx, spv::Scope::Workgroup); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index dbd38a28b..246c3b9ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,10 @@ void IREmitter::SelectionMerge(Block* merge_block) { Inst(Opcode::SelectionMerge, merge_block); } +void IREmitter::Barrier() { + Inst(Opcode::Barrier); +} + void IREmitter::MemoryBarrier(MemoryScope scope) { switch (scope) { case MemoryScope::Workgroup: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 81a57fefe..1b00c548d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -128,6 +128,7 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); + [[nodiscard]] void Barrier(); [[nodiscard]] void MemoryBarrier(MemoryScope scope); template diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 074c71d53..481202d94 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -57,6 +57,10 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Return: case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: + case Opcode::Barrier: + case Opcode::MemoryBarrierWorkgroupLevel: + case Opcode::MemoryBarrierDeviceLevel: + case Opcode::MemoryBarrierSystemLevel: case Opcode::Prologue: case Opcode::Epilogue: case Opcode::SetAttribute: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 734f5328b..dcd54bcf7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -17,6 +17,7 @@ OPCODE(Unreachable, Void, OPCODE(DemoteToHelperInvocation, Void, Label, ) // Barriers +OPCODE(Barrier, Void, ) OPCODE(MemoryBarrierWorkgroupLevel, Void, ) OPCODE(MemoryBarrierDeviceLevel, Void, ) OPCODE(MemoryBarrierSystemLevel, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 26d5e276b..2a2a294df 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -38,6 +38,7 @@ void TranslatorVisitor::MEMBAR(u64 inst) { u64 raw; BitField<8, 2, LocalScope> scope; } membar{inst}; + ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); } @@ -45,8 +46,61 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::BAR(u64) { - throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR); +void TranslatorVisitor::BAR(u64 insn) { + enum class Mode { + RedPopc, + Scan, + RedAnd, + RedOr, + Sync, + Arrive, + }; + union { + u64 raw; + BitField<43, 1, u64> is_a_imm; + BitField<44, 1, u64> is_b_imm; + BitField<8, 8, u64> imm_a; + BitField<20, 12, u64> imm_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, IR::Pred> pred; + } const bar{insn}; + + const Mode mode{[insn] { + switch (insn & 0x0000009B00000000ULL) { + case 0x0000000200000000ULL: + return Mode::RedPopc; + case 0x0000000300000000ULL: + return Mode::Scan; + case 0x0000000A00000000ULL: + return Mode::RedAnd; + case 0x0000001200000000ULL: + return Mode::RedOr; + case 0x0000008000000000ULL: + return Mode::Sync; + case 0x0000008100000000ULL: + return Mode::Arrive; + } + throw NotImplementedException("Invalid encoding"); + }()}; + if (mode != Mode::Sync) { + throw NotImplementedException("BAR mode {}", mode); + } + if (bar.is_a_imm == 0) { + throw NotImplementedException("Non-immediate input A"); + } + if (bar.imm_a != 0) { + throw NotImplementedException("Non-zero input A"); + } + if (bar.is_b_imm == 0) { + throw NotImplementedException("Non-immediate input B"); + } + if (bar.imm_b != 0) { + throw NotImplementedException("Non-zero input B"); + } + if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { + throw NotImplementedException("Non-true input predicate"); + } + ir.Barrier(); } } // namespace Shader::Maxwell From da6cf2632cd4dc0d2b0278353fcaee0789b418c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:17:17 -0300 Subject: [PATCH 180/770] shader: Add subgroup masks --- .../backend/spirv/emit_context.cpp | 10 +- .../backend/spirv/emit_context.h | 5 + .../backend/spirv/emit_spirv.h | 5 + .../backend/spirv/emit_spirv_warp.cpp | 46 ++++++-- .../frontend/ir/ir_emitter.cpp | 20 ++++ .../frontend/ir/ir_emitter.h | 5 + src/shader_recompiler/frontend/ir/opcodes.inc | 5 + .../translate/impl/move_special_register.cpp | 110 ++++++++++++------ .../ir_opt/collect_shader_info_pass.cpp | 7 ++ src/shader_recompiler/shader_info.h | 1 + 10 files changed, 169 insertions(+), 45 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index e70b78a28..5ef637fe7 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -390,8 +390,16 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } + if (info.uses_subgroup_mask) { + subgroup_mask_eq = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupEqMaskKHR); + subgroup_mask_lt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLtMaskKHR); + subgroup_mask_le = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLeMaskKHR); + subgroup_mask_gt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGtMaskKHR); + subgroup_mask_ge = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGeMaskKHR); + } if (info.uses_subgroup_invocation_id || - (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { + (profile.warp_size_potentially_larger_than_guest && + (info.uses_subgroup_vote || info.uses_subgroup_mask))) { subgroup_local_invocation_id = DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 3a686a78c..03c5a6aba 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -97,6 +97,11 @@ public: Id workgroup_id{}; Id local_invocation_id{}; Id subgroup_local_invocation_id{}; + Id subgroup_mask_eq{}; + Id subgroup_mask_lt{}; + Id subgroup_mask_le{}; + Id subgroup_mask_gt{}; + Id subgroup_mask_ge{}; Id instance_id{}; Id instance_index{}; Id base_instance{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 032b0b2f9..712c5e61f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -401,6 +401,11 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask); Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index cbc5b1c96..c57bd291d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -6,10 +6,18 @@ namespace Shader::Backend::SPIRV { namespace { -Id LargeWarpBallot(EmitContext& ctx, Id ballot) { +Id WarpExtract(EmitContext& ctx, Id value) { const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; - return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); + return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); +} + +Id LoadMask(EmitContext& ctx, Id mask) { + const Id value{ctx.OpLoad(ctx.U32[4], mask)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); + } + return WarpExtract(ctx, value); } void SetInBoundsFlag(IR::Inst* inst, Id result) { @@ -47,8 +55,8 @@ Id EmitVoteAll(EmitContext& ctx, Id pred) { return ctx.OpSubgroupAllKHR(ctx.U1, pred); } const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; return ctx.OpIEqual(ctx.U1, lhs, active_mask); } @@ -58,8 +66,8 @@ Id EmitVoteAny(EmitContext& ctx, Id pred) { return ctx.OpSubgroupAnyKHR(ctx.U1, pred); } const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); } @@ -69,8 +77,8 @@ Id EmitVoteEqual(EmitContext& ctx, Id pred) { return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); } const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; - const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; - const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id active_mask{WarpExtract(ctx, mask_ballot)}; + const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), ctx.OpIEqual(ctx.U1, lhs, active_mask)); @@ -81,7 +89,27 @@ Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); } - return LargeWarpBallot(ctx, ballot); + return WarpExtract(ctx, ballot); +} + +Id EmitSubgroupEqMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_eq); +} + +Id EmitSubgroupLtMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_lt); +} + +Id EmitSubgroupLeMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_le); +} + +Id EmitSubgroupGtMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_gt); +} + +Id EmitSubgroupGeMask(EmitContext& ctx) { + return LoadMask(ctx, ctx.subgroup_mask_ge); } Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 246c3b9ef..ed1e0dd3b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1628,6 +1628,26 @@ U32 IREmitter::SubgroupBallot(const U1& value) { return Inst(Opcode::SubgroupBallot, value); } +U32 IREmitter::SubgroupEqMask() { + return Inst(Opcode::SubgroupEqMask); +} + +U32 IREmitter::SubgroupLtMask() { + return Inst(Opcode::SubgroupLtMask); +} + +U32 IREmitter::SubgroupLeMask() { + return Inst(Opcode::SubgroupLeMask); +} + +U32 IREmitter::SubgroupGtMask() { + return Inst(Opcode::SubgroupGtMask); +} + +U32 IREmitter::SubgroupGeMask() { + return Inst(Opcode::SubgroupGeMask); +} + U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask) { return Inst(Opcode::ShuffleIndex, value, index, clamp, seg_mask); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1b00c548d..42756af43 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -281,6 +281,11 @@ public: [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); [[nodiscard]] U32 SubgroupBallot(const U1& value); + [[nodiscard]] U32 SubgroupEqMask(); + [[nodiscard]] U32 SubgroupLtMask(); + [[nodiscard]] U32 SubgroupLeMask(); + [[nodiscard]] U32 SubgroupGtMask(); + [[nodiscard]] U32 SubgroupGeMask(); [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask); [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dcd54bcf7..1697de965 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -417,6 +417,11 @@ OPCODE(VoteAll, U1, U1, OPCODE(VoteAny, U1, U1, ) OPCODE(VoteEqual, U1, U1, ) OPCODE(SubgroupBallot, U32, U1, ) +OPCODE(SubgroupEqMask, U32, ) +OPCODE(SubgroupLtMask, U32, ) +OPCODE(SubgroupLeMask, U32, ) +OPCODE(SubgroupGtMask, U32, ) +OPCODE(SubgroupGeMask, U32, ) OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 7d9c42a83..be1f21e7b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -10,6 +10,7 @@ namespace Shader::Maxwell { namespace { enum class SpecialRegister : u64 { SR_LANEID = 0, + SR_CLOCK = 1, SR_VIRTCFG = 2, SR_VIRTID = 3, SR_PM0 = 4, @@ -20,6 +21,9 @@ enum class SpecialRegister : u64 { SR_PM5 = 9, SR_PM6 = 10, SR_PM7 = 11, + SR12 = 12, + SR13 = 13, + SR14 = 14, SR_ORDERING_TICKET = 15, SR_PRIM_TYPE = 16, SR_INVOCATION_ID = 17, @@ -41,44 +45,70 @@ enum class SpecialRegister : u64 { SR_TID_X = 33, SR_TID_Y = 34, SR_TID_Z = 35, + SR_CTA_PARAM = 36, SR_CTAID_X = 37, SR_CTAID_Y = 38, SR_CTAID_Z = 39, - SR_NTID = 49, - SR_CirQueueIncrMinusOne = 50, - SR_NLATC = 51, - SR_SWINLO = 57, - SR_SWINSZ = 58, - SR_SMEMSZ = 59, - SR_SMEMBANKS = 60, - SR_LWINLO = 61, - SR_LWINSZ = 62, - SR_LMEMLOSZ = 63, - SR_LMEMHIOFF = 64, - SR_EQMASK = 65, - SR_LTMASK = 66, - SR_LEMASK = 67, - SR_GTMASK = 68, - SR_GEMASK = 69, - SR_REGALLOC = 70, - SR_GLOBALERRORSTATUS = 73, - SR_WARPERRORSTATUS = 75, - SR_PM_HI0 = 81, - SR_PM_HI1 = 82, - SR_PM_HI2 = 83, - SR_PM_HI3 = 84, - SR_PM_HI4 = 85, - SR_PM_HI5 = 86, - SR_PM_HI6 = 87, - SR_PM_HI7 = 88, - SR_CLOCKLO = 89, - SR_CLOCKHI = 90, - SR_GLOBALTIMERLO = 91, - SR_GLOBALTIMERHI = 92, - SR_HWTASKID = 105, - SR_CIRCULARQUEUEENTRYINDEX = 106, - SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, - SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, + SR_NTID = 40, + SR_CirQueueIncrMinusOne = 41, + SR_NLATC = 42, + SR43 = 43, + SR_SM_SPA_VERSION = 44, + SR_MULTIPASSSHADERINFO = 45, + SR_LWINHI = 46, + SR_SWINHI = 47, + SR_SWINLO = 48, + SR_SWINSZ = 49, + SR_SMEMSZ = 50, + SR_SMEMBANKS = 51, + SR_LWINLO = 52, + SR_LWINSZ = 53, + SR_LMEMLOSZ = 54, + SR_LMEMHIOFF = 55, + SR_EQMASK = 56, + SR_LTMASK = 57, + SR_LEMASK = 58, + SR_GTMASK = 59, + SR_GEMASK = 60, + SR_REGALLOC = 61, + SR_BARRIERALLOC = 62, + SR63 = 63, + SR_GLOBALERRORSTATUS = 64, + SR65 = 65, + SR_WARPERRORSTATUS = 66, + SR_WARPERRORSTATUSCLEAR = 67, + SR68 = 68, + SR69 = 69, + SR70 = 70, + SR71 = 71, + SR_PM_HI0 = 72, + SR_PM_HI1 = 73, + SR_PM_HI2 = 74, + SR_PM_HI3 = 75, + SR_PM_HI4 = 76, + SR_PM_HI5 = 77, + SR_PM_HI6 = 78, + SR_PM_HI7 = 79, + SR_CLOCKLO = 80, + SR_CLOCKHI = 81, + SR_GLOBALTIMERLO = 82, + SR_GLOBALTIMERHI = 83, + SR84 = 84, + SR85 = 85, + SR86 = 86, + SR87 = 87, + SR88 = 88, + SR89 = 89, + SR90 = 90, + SR91 = 91, + SR92 = 92, + SR93 = 93, + SR94 = 94, + SR95 = 95, + SR_HWTASKID = 96, + SR_CIRCULARQUEUEENTRYINDEX = 97, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, }; [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { @@ -103,6 +133,16 @@ enum class SpecialRegister : u64 { return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); + case SpecialRegister::SR_EQMASK: + return ir.SubgroupEqMask(); + case SpecialRegister::SR_LTMASK: + return ir.SubgroupLtMask(); + case SpecialRegister::SR_LEMASK: + return ir.SubgroupLeMask(); + case SpecialRegister::SR_GTMASK: + return ir.SubgroupGtMask(); + case SpecialRegister::SR_GEMASK: + return ir.SubgroupGeMask(); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 07f031ea6..0f870535b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -414,6 +414,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::SubgroupEqMask: + case IR::Opcode::SubgroupLtMask: + case IR::Opcode::SubgroupLeMask: + case IR::Opcode::SubgroupGtMask: + case IR::Opcode::SubgroupGeMask: + info.uses_subgroup_mask = true; + break; case IR::Opcode::VoteAll: case IR::Opcode::VoteAny: case IR::Opcode::VoteEqual: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d4d039eaf..9551a124f 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -99,6 +99,7 @@ struct Info { bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; bool uses_subgroup_vote{}; + bool uses_subgroup_mask{}; bool uses_fswzadd{}; IR::Type used_constant_buffer_types{}; From ffca21487f9728015a2c036fa581ead7d3d074d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:18:09 -0300 Subject: [PATCH 181/770] shader: Eliminate orphan blocks more efficiently --- .../frontend/maxwell/program.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 7b08f11b0..05b7591bc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -14,20 +14,20 @@ #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { - -static void RemoveUnreachableBlocks(IR::Program& program) { +namespace { +void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const IR::BlockList& post_order{program.post_order_blocks}; - std::erase_if(program.blocks, [&](IR::Block* block) { - return std::ranges::find(post_order, block) == post_order.end(); - }); + const auto begin{std::next(program.blocks.begin())}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); } -static void CollectInterpolationInfo(Environment& env, IR::Program& program) { +void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (program.stage != Stage::Fragment) { return; } @@ -60,6 +60,7 @@ static void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } +} // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { From 9e6fe430bdc615ae5f7cc4fbc32d7e2baccd7ceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 20:00:34 -0300 Subject: [PATCH 182/770] shader: Fix splits on blocks using indirect branches --- .../frontend/maxwell/control_flow.cpp | 35 ++++++++++++++++--- .../frontend/maxwell/control_flow.h | 18 ++++------ .../frontend/maxwell/program.cpp | 2 +- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index ac8707847..eb0f7c8d1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -45,19 +45,29 @@ void Split(Block* old_block, Block* new_block, Location pc) { .begin{pc}, .end{old_block->end}, .end_class{old_block->end_class}, - .stack{old_block->stack}, .cond{old_block->cond}, + .stack{old_block->stack}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, + .function_call{old_block->function_call}, + .return_block{old_block->return_block}, + .branch_reg{old_block->branch_reg}, + .branch_offset{old_block->branch_offset}, + .indirect_branches{std::move(old_block->indirect_branches)}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, - .stack{std::move(old_block->stack)}, .cond{true}, + .stack{std::move(old_block->stack)}, .branch_true{new_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; } @@ -173,10 +183,15 @@ Function::Function(ObjectPool& block_pool, Location start_address) .begin{start_address}, .end{start_address}, .end_class{EndClass::Branch}, - .stack{}, .cond{true}, + .stack{}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}, .stack{}, }} {} @@ -351,10 +366,15 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .begin{block->begin.Virtual()}, .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .stack{block->stack}, .cond{cond}, + .stack{block->stack}, .branch_true{conditional_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -502,10 +522,15 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .begin{pc}, .end{pc}, .end_class{EndClass::Branch}, - .stack{stack}, .cond{true}, + .stack{stack}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}; function.labels.push_back(Label{ .address{pc}, diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index a8c90d27a..466b14198 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -79,18 +79,14 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; EndClass end_class; - Stack stack; IR::Condition cond; - union { - Block* branch_true; - FunctionId function_call; - IR::Reg branch_reg; - }; - union { - Block* branch_false; - Block* return_block; - s32 branch_offset; - }; + Stack stack; + Block* branch_true; + Block* branch_false; + FunctionId function_call; + Block* return_block; + IR::Reg branch_reg; + s32 branch_offset; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 05b7591bc..58caa35a1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -21,7 +21,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const auto begin{std::next(program.blocks.begin())}; + const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); From 72daa2a039d58d23b0dca035bb5f6af8b10ce97b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 5 Apr 2021 08:56:58 +0200 Subject: [PATCH 183/770] shader: Fix ShadowCube declaration type, set number of pipeline threads based on hardware --- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 5ef637fe7..002b305dc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -45,7 +45,7 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { case TextureType::ShadowCube: return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); case TextureType::ShadowArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format); } throw InvalidArgument("Invalid texture type {}", desc.type); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f88ab67ae..088de7001 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/bit_cast.h" @@ -607,7 +608,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { + workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ From 417fb5d385daa0fb40329709e6b4a53937580989 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 5 Apr 2021 19:10:55 -0300 Subject: [PATCH 184/770] shader: Move recursive SSA rewrite to the heap --- .../ir_opt/ssa_rewrite_pass.cpp | 118 +++++++++++++----- 1 file changed, 89 insertions(+), 29 deletions(-) diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 259233746..ca36253d1 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -119,6 +119,26 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return inst.Opcode() == IR::Opcode::Phi; } +enum class Status { + Start, + SetValue, + PreparePhiArgument, + PushPhiArgument, +}; + +template +struct ReadState { + ReadState(IR::Block* block_) : block{block_} {} + ReadState() = default; + + IR::Block* block{}; + IR::Value result{}; + IR::Inst* phi{}; + IR::Block* const* pred_it{}; + IR::Block* const* pred_end{}; + Status pc{Status::Start}; +}; + class Pass { public: template @@ -127,12 +147,75 @@ public: } template - IR::Value ReadVariable(Type variable, IR::Block* block) { - const ValueMap& def{current_def[variable]}; - if (const auto it{def.find(block)}; it != def.end()) { - return it->second; - } - return ReadVariableRecursive(variable, block); + IR::Value ReadVariable(Type variable, IR::Block* root_block) { + boost::container::small_vector, 64> stack{ + ReadState(nullptr), + ReadState(root_block), + }; + const auto prepare_phi_operand{[&] { + if (stack.back().pred_it == stack.back().pred_end) { + IR::Inst* const phi{stack.back().phi}; + IR::Block* const block{stack.back().block}; + const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))}; + stack.pop_back(); + stack.back().result = result; + WriteVariable(variable, block, result); + } else { + IR::Block* const imm_pred{*stack.back().pred_it}; + stack.back().pc = Status::PushPhiArgument; + stack.emplace_back(imm_pred); + } + }}; + do { + IR::Block* const block{stack.back().block}; + switch (stack.back().pc) { + case Status::Start: { + const ValueMap& def{current_def[variable]}; + if (const auto it{def.find(block)}; it != def.end()) { + stack.back().result = it->second; + } else if (!sealed_blocks.contains(block)) { + // Incomplete CFG + IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + incomplete_phis[block].insert_or_assign(variable, phi); + stack.back().result = IR::Value{&*phi}; + } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + imm_preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed + stack.back().pc = Status::SetValue; + stack.emplace_back(imm_preds.front()); + break; + } else { + // Break potential cycles with operandless phi + IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + WriteVariable(variable, block, IR::Value{phi}); + + stack.back().phi = phi; + stack.back().pred_it = imm_preds.data(); + stack.back().pred_end = imm_preds.data() + imm_preds.size(); + prepare_phi_operand(); + break; + } + } + [[fallthrough]]; + case Status::SetValue: { + const IR::Value result{stack.back().result}; + WriteVariable(variable, block, result); + stack.pop_back(); + stack.back().result = result; + break; + } + case Status::PushPhiArgument: { + IR::Inst* const phi{stack.back().phi}; + phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); + ++stack.back().pred_it; + } + [[fallthrough]]; + case Status::PreparePhiArgument: + prepare_phi_operand(); + break; + } + } while (stack.size() > 1); + return stack.back().result; } void SealBlock(IR::Block* block) { @@ -146,29 +229,6 @@ public: } private: - template - IR::Value ReadVariableRecursive(Type variable, IR::Block* block) { - IR::Value val; - if (!sealed_blocks.contains(block)) { - // Incomplete CFG - IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - incomplete_phis[block].insert_or_assign(variable, phi); - val = IR::Value{&*phi}; - } else if (const std::span imm_preds{block->ImmediatePredecessors()}; - imm_preds.size() == 1) { - // Optimize the common case of one predecessor: no phi needed - val = ReadVariable(variable, imm_preds.front()); - } else { - // Break potential cycles with operandless phi - IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - val = IR::Value{&phi_inst}; - WriteVariable(variable, block, val); - val = AddPhiOperands(variable, phi_inst, block); - } - WriteVariable(variable, block, val); - return val; - } - template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { From bfeeb23ddce9f3531a834c257bd8af05c42ed194 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 5 Apr 2021 19:15:45 -0300 Subject: [PATCH 185/770] vk_pipeline_cache: Fix num of pipeline workers on weird platforms --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 088de7001..25f592b8a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -608,7 +608,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; From 20ba0ea0a94fa915cad6392b3742d8e58e2fa0d9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 6 Apr 2021 02:01:01 +0200 Subject: [PATCH 186/770] shader: Fix BRX tracking --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 6 +++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index eb0f7c8d1..1a4ee4f6c 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -197,7 +197,7 @@ Function::Function(ObjectPool& block_pool, Location start_address) }} {} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_} { + : env{env_}, block_pool{block_pool_}, program_start{start_address} { functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -427,9 +427,9 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, FunctionId function_id) { - const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; if (!brx_table) { - TrackIndirectBranchTable(env, pc, block->begin); + TrackIndirectBranchTable(env, pc, program_start); throw NotImplementedException("Failed to track indirect branch"); } const IR::FlowTest flow_test{inst.branch.flow_test}; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 466b14198..9f570fbb5 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -157,6 +157,7 @@ private: ObjectPool& block_pool; boost::container::small_vector functions; FunctionId current_function_id{0}; + Location program_start; }; } // namespace Shader::Maxwell::Flow From 0df7e509db060693ee1f131bae44045db995c3bd Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 02:42:58 +0200 Subject: [PATCH 187/770] shader: Implement AL2P --- src/shader_recompiler/CMakeLists.txt | 1 + .../impl/attribute_memory_to_physical.cpp | 35 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 4 --- 3 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 700b17113..22639fe13 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h + frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp frontend/maxwell/translate/impl/barrier_operations.cpp frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class BitSize : u64 { + B32, + B64, + B96, + B128, +}; + +void TranslatorVisitor::AL2P(u64 inst) { + union { + u64 raw; + BitField<0, 8, IR::Reg> result_register; + BitField<8, 8, IR::Reg> indexing_register; + BitField<20, 11, s64> offset; + BitField<47, 2, BitSize> bitsize; + } al2p{inst}; + if (al2p.bitsize != BitSize::B32) { + throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); + } + const IR::U32 converted_offset{ir.Imm32(static_cast(al2p.offset.Value()))}; + const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; + X(al2p.result_register, result); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index acabb0118..ba0cfa673 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -13,10 +13,6 @@ namespace Shader::Maxwell { throw NotImplementedException("Instruction {} is not implemented", opcode); } -void TranslatorVisitor::AL2P(u64) { - ThrowNotImplemented(Opcode::AL2P); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } From 1d51803169f72f79e19995072fb9e8a371dbdcbf Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 06:47:14 +0200 Subject: [PATCH 188/770] shader: Implement indexed attributes --- .../backend/spirv/emit_context.cpp | 124 ++++++++++++++++++ .../backend/spirv/emit_context.h | 4 + .../backend/spirv/emit_spirv.h | 4 +- .../spirv/emit_spirv_context_get_set.cpp | 8 +- .../frontend/ir/ir_emitter.cpp | 8 ++ .../frontend/ir/ir_emitter.h | 3 + .../frontend/maxwell/program.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 38 ++++-- .../ir_opt/collect_shader_info_pass.cpp | 36 ++++- src/shader_recompiler/ir_opt/passes.h | 2 +- src/shader_recompiler/program_header.h | 83 ++++++++++-- src/shader_recompiler/shader_info.h | 2 + 12 files changed, 279 insertions(+), 35 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 002b305dc..eadecb064 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -82,6 +82,28 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { } throw InvalidArgument("Invalid attribute type {}", type); } + +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +std::optional AttrTypes(EmitContext& ctx, u32 index) { + const AttributeType type{ctx.profile.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; + } + throw InvalidArgument("Invalid attribute type {}", type); +} + } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -107,6 +129,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); + DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -290,6 +313,107 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } } +void EmitContext::DefineAttributeMemAccess(const Info& info) { + const auto make_load{[&]() { + const Id end_block{OpLabel()}; + const Id default_label{OpLabel()}; + + const Id func_type_load{TypeFunction(F32[1], U32[1])}; + const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; + const Id offset{OpFunctionParameter(U32[1])}; + AddLabel(); + const Id base_index{OpShiftRightLogical(U32[1], offset, Constant(U32[1], 2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; + const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))}; + std::vector literals; + std::vector labels; + const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; + for (u32 i = 0; i < info.input_generics.size(); i++) { + if (!info.input_generics[i].used) { + continue; + } + literals.push_back(base_attribute_value + i); + labels.push_back(OpLabel()); + } + OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); + OpSwitch(compare_index, default_label, literals, labels); + AddLabel(default_label); + OpReturnValue(Constant(F32[1], 0.0f)); + size_t label_index = 0; + for (u32 i = 0; i < info.input_generics.size(); i++) { + if (!info.input_generics[i].used) { + continue; + } + AddLabel(labels[label_index]); + const auto type{AttrTypes(*this, i)}; + if (!type) { + OpReturnValue(Constant(F32[1], 0.0f)); + label_index++; + continue; + } + const Id generic_id{input_generics.at(i)}; + const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)}; + const Id value{OpLoad(type->id, pointer)}; + const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; + OpReturnValue(result); + label_index++; + } + AddLabel(end_block); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + const auto make_store{[&]() { + const Id end_block{OpLabel()}; + const Id default_label{OpLabel()}; + + const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])}; + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id store_value{OpFunctionParameter(F32[1])}; + AddLabel(); + const Id base_index{OpShiftRightLogical(U32[1], offset, Constant(U32[1], 2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; + const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))}; + std::vector literals; + std::vector labels; + const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; + for (u32 i = 0; i < info.stores_generics.size(); i++) { + if (!info.stores_generics[i]) { + continue; + } + literals.push_back(base_attribute_value + i); + labels.push_back(OpLabel()); + } + OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); + OpSwitch(compare_index, default_label, literals, labels); + AddLabel(default_label); + OpReturn(); + size_t label_index = 0; + for (u32 i = 0; i < info.stores_generics.size(); i++) { + if (!info.stores_generics[i]) { + continue; + } + AddLabel(labels[label_index]); + const Id generic_id{output_generics.at(i)}; + const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + label_index++; + } + AddLabel(end_block); + OpUnreachable(); + OpFunctionEnd(); + return func; + }}; + if (info.loads_indexed_attributes) { + indexed_load_func = make_load(); + } + if (info.stores_indexed_attributes) { + indexed_store_func = make_store(); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 03c5a6aba..7a2ac0511 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -116,6 +116,9 @@ public: Id fswzadd_lut_a{}; Id fswzadd_lut_b{}; + Id indexed_load_func{}; + Id indexed_store_func{}; + Id local_memory{}; Id shared_memory_u8{}; @@ -148,6 +151,7 @@ private: void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); + void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 712c5e61f..08460c94e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -51,8 +51,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); -void EmitGetAttributeIndexed(EmitContext& ctx); -void EmitSetAttributeIndexed(EmitContext& ctx); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1bfc60294..a60eca815 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -216,12 +216,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { ctx.OpStore(*output, value); } -void EmitGetAttributeIndexed(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) { + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); } -void EmitSetAttributeIndexed(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); } void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ed1e0dd3b..e4e9b260c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -307,6 +307,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +F32 IREmitter::GetAttributeIndexed(IR::U32 phys_address) { + return Inst(Opcode::GetAttributeIndexed, phys_address); +} + +void IREmitter::SetAttributeIndexed(IR::U32 phys_address, const F32& value) { + Inst(Opcode::SetAttributeIndexed, phys_address, value); +} + void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 42756af43..afa8bd924 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -76,6 +76,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(IR::U32 phys_address); + void SetAttributeIndexed(IR::U32 phys_address, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 58caa35a1..aaf2a74a7 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -87,7 +87,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolInstructions()) { Visit(info, inst); } } + GatherInfoFromHeader(env, info); } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 5c1fc166c..186104713 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -12,7 +12,7 @@ namespace Shader::Optimization { -void CollectShaderInfoPass(IR::Program& program); +void CollectShaderInfoPass(Environment& env, IR::Program& program); void ConstantPropagationPass(IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 1544bfa42..ce65fc1a4 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -68,10 +68,24 @@ struct ProgramHeader { union { struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 1, u8> x; + BitField<1, 1, u8> y; + BitField<2, 1, u8> z; + BitField<3, 1, u8> w; + BitField<4, 1, u8> x2; + BitField<5, 1, u8> y2; + BitField<6, 1, u8> z2; + BitField<7, 1, u8> w2; + BitField<0, 4, u8> first; + BitField<4, 4, u8> second; + u8 raw; + } imap_generic_vector[16]; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { BitField<0, 8, u16> clip_distances; BitField<8, 1, u16> point_sprite_s; @@ -82,15 +96,54 @@ struct ProgramHeader { BitField<14, 1, u16> instance_id; BitField<15, 1, u16> vertex_id; }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved + INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB + + union { + BitField<0, 1, u8> x; + BitField<1, 1, u8> y; + BitField<2, 1, u8> z; + BitField<3, 1, u8> w; + BitField<4, 1, u8> x2; + BitField<5, 1, u8> y2; + BitField<6, 1, u8> z2; + BitField<7, 1, u8> w2; + BitField<0, 4, u8> first; + BitField<4, 4, u8> second; + u8 raw; + } omap_generic_vector[16]; + + INSERT_PADDING_BYTES_NOINIT(2); // OmapColor + + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + } omap_systemc; + + INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + + [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { + if ((index & 1) == 0) { + return imap_generic_vector[index >> 1].first != 0; + } + return imap_generic_vector[index >> 1].second != 0; + } + + [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { + if ((index & 1) == 0) { + return omap_generic_vector[index >> 1].first != 0; + } + return omap_generic_vector[index >> 1].second != 0; + } } vtg; struct { @@ -128,6 +181,10 @@ struct ProgramHeader { const auto& vector{imap_generic_vector[attribute]}; return {vector.x, vector.y, vector.z, vector.w}; } + + [[nodiscard]] bool IsGenericVectorActive(size_t index) const { + return imap_generic_vector[index].raw != 0; + } } ps; std::array raw; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 9551a124f..41bb5b9a1 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -76,6 +76,7 @@ struct Info { bool loads_vertex_id{}; bool loads_front_face{}; bool loads_point_coord{}; + bool loads_indexed_attributes{}; std::array stores_frag_color{}; bool stores_frag_depth{}; @@ -84,6 +85,7 @@ struct Info { bool stores_point_size{}; bool stores_clip_distance{}; bool stores_viewport_index{}; + bool stores_indexed_attributes{}; bool uses_fp16{}; bool uses_fp64{}; From 73cb17f41bf019df504d2d2af4ebdf45aa3201c6 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 09:38:15 +0200 Subject: [PATCH 189/770] shader: Implement indexed Position and ClipDistances --- .../backend/spirv/emit_context.cpp | 40 +++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 26 +++++++---- src/shader_recompiler/program_header.h | 45 +++++++++++++++++-- 3 files changed, 100 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index eadecb064..e22bb5371 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -327,6 +327,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))}; std::vector literals; std::vector labels; + if (info.loads_position) { + literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); + labels.push_back(OpLabel()); + } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; for (u32 i = 0; i < info.input_generics.size(); i++) { if (!info.input_generics[i].used) { @@ -340,6 +344,12 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Constant(F32[1], 0.0f)); size_t label_index = 0; + if (info.loads_position) { + AddLabel(labels[label_index]); + const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))}; + OpReturnValue(result); + label_index++; + } for (u32 i = 0; i < info.input_generics.size(); i++) { if (!info.input_generics[i].used) { continue; @@ -377,6 +387,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))}; std::vector literals; std::vector labels; + if (info.stores_position) { + literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); + labels.push_back(OpLabel()); + } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; for (u32 i = 0; i < info.stores_generics.size(); i++) { if (!info.stores_generics[i]) { @@ -385,11 +399,24 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { literals.push_back(base_attribute_value + i); labels.push_back(OpLabel()); } + if (info.stores_clip_distance) { + literals.push_back(static_cast(IR::Attribute::ClipDistance0) >> 2); + labels.push_back(OpLabel()); + literals.push_back(static_cast(IR::Attribute::ClipDistance4) >> 2); + labels.push_back(OpLabel()); + } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); OpSwitch(compare_index, default_label, literals, labels); AddLabel(default_label); OpReturn(); size_t label_index = 0; + if (info.stores_position) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + label_index++; + } for (u32 i = 0; i < info.stores_generics.size(); i++) { if (!info.stores_generics[i]) { continue; @@ -401,6 +428,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturn(); label_index++; } + if (info.stores_clip_distance) { + AddLabel(labels[label_index]); + const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; + OpStore(pointer, store_value); + OpReturn(); + label_index++; + AddLabel(labels[label_index]); + const Id fixed_index{OpIAdd(U32[1], masked_index, Constant(U32[1], 4))}; + const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; + OpStore(pointer2, store_value); + OpReturn(); + label_index++; + } AddLabel(end_block); OpUnreachable(); OpFunctionEnd(); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index dbe9f1f40..a14465598 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -517,22 +517,32 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } const auto& header = env.SPH(); if (stage == Stage::Fragment) { + if (!info.loads_indexed_attributes) { + return; + } for (size_t i = 0; i < info.input_generics.size(); i++) { info.input_generics[i].used = info.input_generics[i].used || header.ps.IsGenericVectorActive(i); } + info.loads_position = info.loads_position || header.ps.imap_systemb.position != 0; return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used = - info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + if (info.loads_indexed_attributes) { + for (size_t i = 0; i < info.input_generics.size(); i++) { + info.input_generics[i].used = + info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + } } - for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] = - info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + if (info.stores_indexed_attributes) { + info.loads_position = info.loads_position || header.vtg.imap_systemb.position != 0; + for (size_t i = 0; i < info.stores_generics.size(); i++) { + info.stores_generics[i] = + info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + } + info.stores_clip_distance = + info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; + info.stores_position = info.stores_position || header.vtg.omap_systemb.position != 0; } - info.stores_clip_distance = - info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; } } // Anonymous namespace diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index ce65fc1a4..15f43f2d8 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -69,7 +69,20 @@ struct ProgramHeader { union { struct { INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 1, u8> primitive_array_id; + BitField<1, 1, u8> rt_array_index; + BitField<2, 1, u8> viewport_index; + BitField<3, 1, u8> point_size; + BitField<4, 1, u8> position_x; + BitField<5, 1, u8> position_y; + BitField<6, 1, u8> position_z; + BitField<7, 1, u8> position_w; + BitField<0, 4, u8> first; + BitField<4, 4, u8> position; + u8 raw; + } imap_systemb; union { BitField<0, 1, u8> x; @@ -99,7 +112,20 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB + + union { + BitField<0, 1, u8> primitive_array_id; + BitField<1, 1, u8> rt_array_index; + BitField<2, 1, u8> viewport_index; + BitField<3, 1, u8> point_size; + BitField<4, 1, u8> position_x; + BitField<5, 1, u8> position_y; + BitField<6, 1, u8> position_z; + BitField<7, 1, u8> position_w; + BitField<0, 4, u8> first; + BitField<4, 4, u8> position; + u8 raw; + } omap_systemb; union { BitField<0, 1, u8> x; @@ -148,7 +174,20 @@ struct ProgramHeader { struct { INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 1, u8> primitive_array_id; + BitField<1, 1, u8> rt_array_index; + BitField<2, 1, u8> viewport_index; + BitField<3, 1, u8> point_size; + BitField<4, 1, u8> position_x; + BitField<5, 1, u8> position_y; + BitField<6, 1, u8> position_z; + BitField<7, 1, u8> position_w; + BitField<0, 4, u8> first; + BitField<4, 4, u8> position; + u8 raw; + } imap_systemb; union { BitField<0, 2, PixelImap> x; From dcaf0e91508562a70a49db7011ad09f13f811d71 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 5 Apr 2021 04:03:12 +0200 Subject: [PATCH 190/770] shader: Address feedback --- .../backend/spirv/emit_context.cpp | 40 +++++++++---------- .../frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/ir_emitter.h | 4 +- .../translate/impl/load_store_attribute.cpp | 37 +++++++++-------- .../ir_opt/collect_shader_info_pass.cpp | 22 +++++----- 5 files changed, 54 insertions(+), 53 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index e22bb5371..2d39ea373 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -314,7 +314,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } void EmitContext::DefineAttributeMemAccess(const Info& info) { - const auto make_load{[&]() { + const auto make_load{[&] { const Id end_block{OpLabel()}; const Id default_label{OpLabel()}; @@ -322,9 +322,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; const Id offset{OpFunctionParameter(U32[1])}; AddLabel(); - const Id base_index{OpShiftRightLogical(U32[1], offset, Constant(U32[1], 2U))}; + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; - const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Constant(U32[1], 2U))}; std::vector literals; std::vector labels; if (info.loads_position) { @@ -343,22 +343,22 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpSwitch(compare_index, default_label, literals, labels); AddLabel(default_label); OpReturnValue(Constant(F32[1], 0.0f)); - size_t label_index = 0; + size_t label_index{0}; if (info.loads_position) { AddLabel(labels[label_index]); const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))}; OpReturnValue(result); - label_index++; + ++label_index; } - for (u32 i = 0; i < info.input_generics.size(); i++) { + for (size_t i = 0; i < info.input_generics.size(); i++) { if (!info.input_generics[i].used) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, i)}; + const auto type{AttrTypes(*this, static_cast(i))}; if (!type) { OpReturnValue(Constant(F32[1], 0.0f)); - label_index++; + ++label_index; continue; } const Id generic_id{input_generics.at(i)}; @@ -366,14 +366,14 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id value{OpLoad(type->id, pointer)}; const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; OpReturnValue(result); - label_index++; + ++label_index; } AddLabel(end_block); OpUnreachable(); OpFunctionEnd(); return func; }}; - const auto make_store{[&]() { + const auto make_store{[&] { const Id end_block{OpLabel()}; const Id default_label{OpLabel()}; @@ -382,9 +382,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id offset{OpFunctionParameter(U32[1])}; const Id store_value{OpFunctionParameter(F32[1])}; AddLabel(); - const Id base_index{OpShiftRightLogical(U32[1], offset, Constant(U32[1], 2U))}; + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; - const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Constant(U32[1], 2U))}; std::vector literals; std::vector labels; if (info.stores_position) { @@ -392,11 +392,11 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.stores_generics.size(); i++) { + for (size_t i = 0; i < info.stores_generics.size(); i++) { if (!info.stores_generics[i]) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + static_cast(i)); labels.push_back(OpLabel()); } if (info.stores_clip_distance) { @@ -409,15 +409,15 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpSwitch(compare_index, default_label, literals, labels); AddLabel(default_label); OpReturn(); - size_t label_index = 0; + size_t label_index{0}; if (info.stores_position) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); - label_index++; + ++label_index; } - for (u32 i = 0; i < info.stores_generics.size(); i++) { + for (size_t i = 0; i < info.stores_generics.size(); i++) { if (!info.stores_generics[i]) { continue; } @@ -426,20 +426,20 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); - label_index++; + ++label_index; } if (info.stores_clip_distance) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); OpReturn(); - label_index++; + ++label_index; AddLabel(labels[label_index]); const Id fixed_index{OpIAdd(U32[1], masked_index, Constant(U32[1], 4))}; const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; OpStore(pointer2, store_value); OpReturn(); - label_index++; + ++label_index; } AddLabel(end_block); OpUnreachable(); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e4e9b260c..13eb2de4c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -307,11 +307,11 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } -F32 IREmitter::GetAttributeIndexed(IR::U32 phys_address) { +F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { return Inst(Opcode::GetAttributeIndexed, phys_address); } -void IREmitter::SetAttributeIndexed(IR::U32 phys_address, const F32& value) { +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { Inst(Opcode::SetAttributeIndexed, phys_address, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index afa8bd924..2cab1dc5d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -76,8 +76,8 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); - [[nodiscard]] F32 GetAttributeIndexed(IR::U32 phys_address); - void SetAttributeIndexed(IR::U32 phys_address, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); + void SetAttributeIndexed(const U32& phys_address, const F32& value); void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 0d248c020..f629e7167 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -44,6 +44,17 @@ u32 NumElements(Size size) { } throw InvalidArgument("Invalid size {}", size); } + +template +void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { + const IR::U32 index_value{v.X(index_reg)}; + for (u32 element = 0; element < num_elements; ++element) { + const IR::U32 final_offset{ + element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; + f(element, final_offset); + } +} + } // Anonymous namespace void TranslatorVisitor::ALD(u64 insn) { @@ -70,18 +81,15 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Unaligned absolute offset {}", offset); } const u32 num_elements{NumElements(ald.size)}; - if (ald.index_reg != IR::Reg::RZ) { - const IR::U32 index_value = X(ald.index_reg); + if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::U32 final_offset = - element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))}; - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); } return; } - for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); - } + HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + }); } void TranslatorVisitor::AST(u64 insn) { @@ -110,18 +118,15 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Unaligned absolute offset {}", offset); } const u32 num_elements{NumElements(ast.size)}; - if (ast.index_reg != IR::Reg::RZ) { - const IR::U32 index_value = X(ast.index_reg); + if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::U32 final_offset = - element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))}; - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); } return; } - for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); - } + HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + }); } void TranslatorVisitor::IPA(u64 insn) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a14465598..1c03ee82a 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -511,37 +511,33 @@ void Visit(Info& info, IR::Inst& inst) { } void GatherInfoFromHeader(Environment& env, Info& info) { - auto stage = env.ShaderStage(); + Stage stage{env.ShaderStage()}; if (stage == Stage::Compute) { return; } - const auto& header = env.SPH(); + const auto& header{env.SPH()}; if (stage == Stage::Fragment) { if (!info.loads_indexed_attributes) { return; } for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used = - info.input_generics[i].used || header.ps.IsGenericVectorActive(i); + info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); } - info.loads_position = info.loads_position || header.ps.imap_systemb.position != 0; + info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used = - info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i); + info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); } + info.loads_position |= header.vtg.imap_systemb.position != 0; } if (info.stores_indexed_attributes) { - info.loads_position = info.loads_position || header.vtg.imap_systemb.position != 0; for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] = - info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i); + info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); } - info.stores_clip_distance = - info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0; - info.stores_position = info.stores_position || header.vtg.omap_systemb.position != 0; + info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; + info.stores_position |= header.vtg.omap_systemb.position != 0; } } From 1f3eb601acdcdfa4c119cffbf36b5792147b893f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 02:56:15 -0300 Subject: [PATCH 191/770] shader: Implement texture buffers --- .../backend/spirv/emit_context.cpp | 29 ++++++ .../backend/spirv/emit_context.h | 5 ++ .../backend/spirv/emit_spirv.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 24 +++-- src/shader_recompiler/ir_opt/texture_pass.cpp | 88 ++++++++++++++----- src/shader_recompiler/shader_info.h | 9 ++ .../renderer_vulkan/pipeline_helper.h | 10 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 15 ++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 10 files changed, 158 insertions(+), 39 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d39ea373..d01633628 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -46,6 +46,8 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); case TextureType::ShadowArrayCube: return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format); + case TextureType::Buffer: + break; } throw InvalidArgument("Invalid texture type {}", desc.type); } @@ -129,6 +131,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); + DefineTextureBuffers(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -541,6 +544,32 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + texture_buffers.insert(texture_buffers.end(), desc.count, id); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 7a2ac0511..2a10e94e5 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -90,9 +90,13 @@ public: Id storage_u32{}; + Id image_buffer_type{}; + Id sampled_texture_buffer_type{}; + std::array cbufs{}; std::array ssbos{}; std::vector textures; + std::vector texture_buffers; Id workgroup_id{}; Id local_invocation_id{}; @@ -151,6 +155,7 @@ private: void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); + void DefineTextureBuffers(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 191380db0..32512a0e5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -249,6 +249,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); + ctx.AddCapability(spv::Capability::SampledBuffer); } Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fc40615af..525f67c6e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -128,12 +128,18 @@ Id Texture(EmitContext& ctx, const IR::Value& index) { throw NotImplementedException("Indirect texture sample"); } -Id TextureImage(EmitContext& ctx, const IR::Value& index) { - if (index.IsImmediate()) { +Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect texture sample"); + } + if (info.type == TextureType::Buffer) { + const Id sampler_id{ctx.texture_buffers.at(index.U32())}; + const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; + return ctx.OpImage(ctx.image_buffer_type, id); + } else { const TextureDefinition def{ctx.textures.at(index.U32())}; return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } - throw NotImplementedException("Indirect texture sample"); } Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { @@ -297,17 +303,22 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#pragma optimize("", off) + Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { const auto info{inst->Flags()}; + if (info.type == TextureType::Buffer) { + lod = Id{}; + } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, index), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, index, info), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { const auto info{inst->Flags()}; - const Id image{TextureImage(ctx, index)}; + const Id image{TextureImage(ctx, index, info)}; const Id zero{ctx.u32_zero_value}; const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { @@ -331,6 +342,9 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i case TextureType::ShadowArrayCube: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), mips()); + case TextureType::Buffer: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, + zero, mips()); } throw LogicError("Unspecified image type {}", info.type.Value()); } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index bcb94ce4d..290ce4179 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -147,24 +147,39 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + explicit Descriptors(TextureDescriptors& texture_descriptors_, + TextureBufferDescriptors& texture_buffer_descriptors_) + : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{ + texture_buffer_descriptors_} {} - u32 Add(const TextureDescriptor& descriptor) { - // TODO: Handle arrays - auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { - return descriptor.cbuf_index == existing.cbuf_index && - descriptor.cbuf_offset == existing.cbuf_offset && - descriptor.type == existing.type; - })}; - if (it != descriptors.end()) { - return static_cast(std::distance(descriptors.begin(), it)); - } - descriptors.push_back(descriptor); - return static_cast(descriptors.size()) - 1; + u32 Add(const TextureDescriptor& desc) { + return Add(texture_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + }); + } + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); } private: - TextureDescriptors& descriptors; + template + static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast(descriptors.size()) - 1; + } + + TextureDescriptors& texture_descriptors; + TextureBufferDescriptors& texture_buffer_descriptors; }; } // Anonymous namespace @@ -185,7 +200,10 @@ void TexturePass(Environment& env, IR::Program& program) { std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { return lhs.cbuf.index < rhs.cbuf.index; }); - Descriptors descriptors{program.info.texture_descriptors}; + Descriptors descriptors{ + program.info.texture_descriptors, + program.info.texture_buffer_descriptors, + }; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; @@ -193,16 +211,42 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) { + switch (inst->Opcode()) { + case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); + break; + case IR::Opcode::ImageFetch: + if (flags.type != TextureType::Color1D) { + break; + } + if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + // Replace with the bound texture type only when it's a texture buffer + // If the instruction is 1D and the bound type is 2D, don't change the code and let + // the rasterizer robustness handle it + // This happens on Fire Emblem: Three Houses + flags.type.Assign(TextureType::Buffer); + } + inst->SetFlags(flags); + break; + default: + break; + } + u32 index; + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, + .count{1}, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type{flags.type}, + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, + .count{1}, + }); } - const u32 index{descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, - })}; inst->SetArg(0, IR::Value{index}); } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 41bb5b9a1..e6f0de8d8 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -29,6 +29,7 @@ enum class TextureType : u32 { Shadow3D, ShadowCube, ShadowArrayCube, + Buffer, }; enum class Interpolation { @@ -50,6 +51,13 @@ struct TextureDescriptor { }; using TextureDescriptors = boost::container::small_vector; +struct TextureBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureBufferDescriptors = boost::container::small_vector; + struct ConstantBufferDescriptor { u32 index; u32 count; @@ -112,6 +120,7 @@ struct Info { constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; TextureDescriptors texture_descriptors; + TextureBufferDescriptors texture_buffer_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index eebe5d569..decf0d32c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -93,6 +93,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } } private: @@ -146,6 +149,8 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { case Shader::TextureType::ColorArrayCube: case Shader::TextureType::ShadowArrayCube: return VideoCommon::ImageViewType::CubeArray; + case Shader::TextureType::Buffer: + break; } UNREACHABLE_MSG("Invalid texture type {}", type); return {}; @@ -161,6 +166,11 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samp update_descriptor_queue.AddSampledImage(vk_image_view, sampler); ++index; } + for (const auto& desc : info.texture_buffer_descriptors) { + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + ++index; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0bb5b852d..9922cbd0f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -93,20 +93,23 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; const bool via_header_index{launch_desc.linked_tsc}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a0ef0e98b..afdd8b371 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,19 +169,23 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25f592b8a..23bf84a92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -212,7 +212,7 @@ protected: case Tegra::Texture::TextureType::Texture2DArray: return Shader::TextureType::ColorArray2D; case Tegra::Texture::TextureType::Texture1DBuffer: - throw Shader::NotImplementedException("Texture buffer"); + return Shader::TextureType::Buffer; case Tegra::Texture::TextureType::TextureCubeArray: return Shader::TextureType::ColorArrayCube; default: From ef885522243ff966a0b25a35ad590862ff31e03a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 05:52:41 -0300 Subject: [PATCH 192/770] shader: Fix TextureGrad --- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 525f67c6e..f0f8db8c3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -77,7 +77,7 @@ public: } boost::container::static_vector deriv_x_accum; boost::container::static_vector deriv_y_accum; - for (size_t i = 0; i < num_derivates; i++) { + for (u32 i = 0; i < num_derivates; ++i) { deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); } From 56b92bd89cdf28f51277d6fc68115b2cd4b18864 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 05:53:38 -0300 Subject: [PATCH 193/770] shader: Fix F2I --- .../translate/impl/floating_point_conversion_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ef55b9c75..21ae92be1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -37,7 +37,7 @@ union F2I { BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; BitField<39, 2, Rounding> rounding; - BitField<49, 1, u64> half; + BitField<41, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; BitField<47, 1, u64> cc; From e9a91bc5cc2c39b476ba8946f66930f5ab5608b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 20:14:55 -0300 Subject: [PATCH 194/770] shader: Interact texture buffers with buffer cache --- .../backend/spirv/emit_context.cpp | 54 +++---- .../backend/spirv/emit_context.h | 2 +- src/shader_recompiler/shader_info.h | 2 +- src/video_core/buffer_cache/buffer_cache.h | 138 ++++++++++++++++++ .../renderer_opengl/gl_buffer_cache.h | 1 + .../renderer_opengl/gl_texture_cache.cpp | 4 + .../renderer_opengl/gl_texture_cache.h | 2 + .../renderer_vulkan/pipeline_helper.h | 26 ++-- .../renderer_vulkan/vk_buffer_cache.cpp | 57 ++++++-- .../renderer_vulkan/vk_buffer_cache.h | 18 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 30 ++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++- .../renderer_vulkan/vk_texture_cache.cpp | 63 ++------ .../renderer_vulkan/vk_texture_cache.h | 30 ++-- .../texture_cache/image_view_base.cpp | 9 ++ .../texture_cache/image_view_base.h | 1 + src/video_core/texture_cache/texture_cache.h | 13 +- 17 files changed, 334 insertions(+), 149 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d01633628..b738e00cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -130,8 +130,8 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineSharedMemory(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); - DefineTextures(program.info, binding); DefineTextureBuffers(program.info, binding); + DefineTextures(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -516,6 +516,32 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + texture_buffers.insert(texture_buffers.end(), desc.count, id); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { @@ -544,32 +570,6 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } -void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { - if (info.texture_buffer_descriptors.empty()) { - return; - } - const spv::ImageFormat format{spv::ImageFormat::Unknown}; - image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); - sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); - - const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; - texture_buffers.reserve(info.texture_buffer_descriptors.size()); - for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { - if (desc.count != 1) { - throw NotImplementedException("Array of texture buffers"); - } - const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - texture_buffers.insert(texture_buffers.end(), desc.count, id); - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - binding += desc.count; - } -} - void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 2a10e94e5..f1ac4430c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -154,8 +154,8 @@ private: void DefineSharedMemory(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); - void DefineTextures(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e6f0de8d8..4cc731198 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -119,8 +119,8 @@ struct Info { boost::container::static_vector constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; - TextureDescriptors texture_descriptors; TextureBufferDescriptors texture_buffer_descriptors; + TextureDescriptors texture_descriptors; }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..6701aab82 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,11 +43,14 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; using namespace Common::Literals; @@ -66,6 +70,7 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +101,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -142,11 +151,21 @@ public: void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -254,12 +273,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +297,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +307,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +350,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span ImmediateBuffer(size_t wanted_capacity); @@ -347,10 +377,12 @@ private: std::array vertex_buffers; std::array, NUM_STAGES> uniform_buffers; std::array, NUM_STAGES> storage_buffers; + std::array, NUM_STAGES> texture_buffers; std::array transform_feedback_buffers; std::array compute_uniform_buffers; std::array compute_storage_buffers; + std::array compute_texture_buffers; std::array enabled_uniform_buffers{}; u32 enabled_compute_uniform_buffers = 0; @@ -360,6 +392,9 @@ private: u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; + std::array enabled_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + std::array fast_bound_uniform_buffers{}; std::array uniform_cache_hits{}; @@ -619,6 +654,7 @@ void BufferCache

::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template @@ -626,6 +662,7 @@ void BufferCache

::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template @@ -660,6 +697,18 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; +} + +template +void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; @@ -680,6 +729,18 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; +} + +template +void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format) { + enabled_compute_texture_buffers |= 1U << tbo_index; + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { @@ -988,6 +1049,26 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { }); } +template +void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { + u32 binding_index = 0; + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1050,6 +1131,26 @@ void BufferCache

::BindHostComputeStorageBuffers() { }); } +template +void BufferCache

::BindHostComputeTextureBuffers() { + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { @@ -1060,6 +1161,7 @@ void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1067,6 +1169,7 @@ template void BufferCache

::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template @@ -1166,6 +1269,14 @@ void BufferCache

::UpdateStorageBuffers(size_t stage) { }); } +template +void BufferCache

::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1227,6 +1338,14 @@ void BufferCache

::UpdateComputeStorageBuffers() { }); } +template +void BufferCache

::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { Buffer& buffer = slot_buffers[buffer_id]; @@ -1581,6 +1700,25 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s return binding; } +template +typename BufferCache

::TextureBufferBinding BufferCache

::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + template std::span BufferCache

::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fe91aa452..ddcce5e97 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -155,6 +155,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ff0f03e99..a8bf84218 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1016,6 +1016,10 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info) + : VideoCommon::ImageViewBase{info, view_info} {} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf3b789e3..817b0e650 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -182,6 +182,8 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index decf0d32c..cff93cc60 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,7 +24,8 @@ struct TextureHandle { [[likely]] if (via_header_index) { image = data; sampler = data; - } else { + } + else { const Tegra::Texture::TextureHandle handle{data}; image = handle.tic_id; sampler = via_header_index ? image : handle.tsc_id.Value(); @@ -90,12 +91,12 @@ public: for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); } + for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } } private: @@ -156,20 +157,15 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { return {}; } -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, - const ImageId* image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue) { + image_view_ids += info.texture_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{samplers[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - ++index; - } - for (const auto& desc : info.texture_buffer_descriptors) { - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - ++index; } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..cdda56ab1 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -67,25 +67,50 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) - : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_) { - buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); + : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), + device{&runtime.device}, + buffer{device->GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = SizeBytes(), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + })}, + commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); +} + +VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return *it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .handle = device->GetLogical().CreateBufferView({ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = *buffer, + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, + .offset = offset, + .range = size, + }), + }); + return *views.back().handle; } BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..ea17406dc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -9,6 +9,7 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,6 +27,8 @@ public: explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_); + [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } @@ -35,8 +38,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + vk::BufferView handle; + }; + + const Device* device{}; vk::Buffer buffer; MemoryCommit commit; + std::vector views; }; class BufferCacheRuntime { @@ -87,6 +99,11 @@ public: BindBuffer(buffer, offset, size); } + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format) { + update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + } + private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { update_descriptor_queue.AddBuffer(buffer, offset, size); @@ -123,6 +140,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9922cbd0f..ac47b1f3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -80,8 +80,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, desc.is_written); ++ssbo_index; } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); texture_cache.SynchronizeComputeDescriptors(); @@ -99,6 +97,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -106,16 +108,26 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t image_index{}; - PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - update_descriptor_queue, image_index); + buffer_cache.UnbindComputeTextureBuffers(); + ImageId* texture_buffer_ids{image_view_ids.data()}; + size_t index{}; + for (const auto& desc : info.texture_buffer_descriptors) { + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), + image_view.format); + ++texture_buffer_ids; + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index afdd8b371..893258b4a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,6 +175,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -182,24 +186,37 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + ImageId* texture_buffer_index{image_view_ids.data()}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + size_t index{}; + for (const auto& desc : info.texture_buffer_descriptors) { + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format); + ++index; + ++texture_buffer_index; + } + texture_buffer_index += info.texture_descriptors.size(); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); - size_t index{}; + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - texture_cache, update_descriptor_queue, index); + PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, + update_descriptor_queue); } texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1bbc542a1..e42b091c5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,10 +15,10 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -162,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { - if (info.type != ImageType::Buffer) { - return vk::Buffer{}; - } - const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); - return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = info.size.width * bytes_per_block, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); -} - [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { switch (VideoCore::Surface::GetFormatType(format)) { case VideoCore::Surface::SurfaceType::ColorTexture: @@ -813,13 +794,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + image(MakeImage(runtime.device, info)), + commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (image) { - commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - } else { - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -828,11 +805,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } if (runtime.device.HasDebuggingToolAttached()) { - if (image) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } else { - buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, @@ -884,19 +857,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { - // TODO: Move this to another API - scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, map.offset); - const VkBuffer src_buffer = map.buffer; - const VkBuffer dst_buffer = *buffer; - scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - }); -} - void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -1032,19 +992,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI UNIMPLEMENTED(); break; case VideoCommon::ImageViewType::Buffer: - buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = image.Buffer(), - .format = format_info.format, - .offset = 0, // TODO: Redesign buffer cache to support this - .range = image.guest_size_bytes, - }); + UNREACHABLE(); break; } } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 189ee5a68..498e76a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -41,9 +41,9 @@ struct TextureCacheRuntime { void Finish(); - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, @@ -54,7 +54,7 @@ struct TextureCacheRuntime { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + bool CanAccelerateImageUpload(Image&) const noexcept { return false; } @@ -92,8 +92,6 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); - void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(const StagingBufferRef& map, std::span copies); @@ -101,10 +99,6 @@ public: return *image; } - [[nodiscard]] VkBuffer Buffer() const noexcept { - return *buffer; - } - [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { return aspect_mask; } @@ -121,7 +115,6 @@ public: private: VKScheduler* scheduler; vk::Image image; - vk::Buffer buffer; MemoryCommit commit; vk::ImageView image_view; std::vector storage_image_views; @@ -132,6 +125,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] VkImageView DepthView(); @@ -142,10 +137,6 @@ public: return *image_views[static_cast(query_type)]; } - [[nodiscard]] VkBufferView BufferView() const noexcept { - return *buffer_view; - } - [[nodiscard]] VkImage ImageHandle() const noexcept { return image_handle; } @@ -162,6 +153,14 @@ public: return samples; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); @@ -169,11 +168,12 @@ private: std::array image_views; vk::ImageView depth_view; vk::ImageView stencil_view; - vk::BufferView buffer_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e8d632f9e..450becbeb 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } } +ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) + : format{info.format}, type{ImageViewType::Buffer}, size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { + ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); +} + ImageViewBase::ImageViewBase(const NullImageParams&) {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 73954167e..903f715c5 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); + explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); explicit ImageViewBase(const NullImageParams&); [[nodiscard]] bool IsBuffer() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85ce06d56..5e8d99482 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -968,9 +968,6 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); - } else if (image.info.type == ImageType::Buffer) { - const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); @@ -993,7 +990,12 @@ ImageViewId TextureCache

::FindImageView(const TICEntry& config) { template ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { const ImageInfo info(config); - const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); if (!image_id) { return NULL_IMAGE_VIEW_ID; @@ -1801,6 +1803,9 @@ void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modifi return; } const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } PrepareImage(image_view.image_id, is_modification, invalidate); } From 233e39bb7b9ca7660c7a63a386e285aa5524bd20 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Apr 2021 16:48:39 -0300 Subject: [PATCH 195/770] shader: Fix dangling labels --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1a4ee4f6c..847bb1986 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -518,6 +518,11 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } return &*it; } + // Make sure we don't insert the same layer twice + const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; + if (label_it != function.labels.end()) { + return label_it->block; + } Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, From 5cd3d00167b17c1fe36f97da978a7024e93c14e7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 23:41:27 -0300 Subject: [PATCH 196/770] shader: Fix FCMP immediate variant --- .../maxwell/translate/impl/floating_point_compare.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index e78e9c4e1..c02a40209 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -42,7 +42,15 @@ void TranslatorVisitor::FCMP_cr(u64 insn) { } void TranslatorVisitor::FCMP_imm(u64 insn) { - FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const fcmp{insn}; + const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; + const u32 value{static_cast(fcmp.value) << 12}; + + FCMP(*this, insn, ir.Imm32(value), GetFloatReg39(insn)); } } // namespace Shader::Maxwell From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: [PATCH 197/770] shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- src/common/thread_worker.h | 1 + src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 +- .../backend/spirv/emit_spirv.cpp | 19 ++- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../backend/spirv/emit_spirv_warp.cpp | 2 +- src/shader_recompiler/file_environment.h | 2 +- .../frontend/ir/attribute.cpp | 4 +- .../frontend/ir/basic_block.cpp | 2 +- .../frontend/ir/condition.cpp | 6 +- src/shader_recompiler/frontend/ir/condition.h | 4 +- .../frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/microinstruction.cpp | 16 +- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/program.cpp | 2 - src/shader_recompiler/frontend/ir/value.cpp | 4 +- src/shader_recompiler/frontend/ir/value.h | 2 +- .../frontend/maxwell/control_flow.cpp | 140 ++++++++---------- .../frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../maxwell/structured_control_flow.cpp | 3 +- .../maxwell/translate/impl/double_add.cpp | 6 +- .../impl/double_fused_multiply_add.cpp | 6 +- .../translate/impl/double_multiply.cpp | 6 +- .../translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- ...oating_point_conversion_floating_point.cpp | 6 +- .../floating_point_conversion_integer.cpp | 11 +- .../floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../impl/floating_point_swizzled_add.cpp | 6 +- .../impl/half_floating_point_add.cpp | 11 +- ...half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../impl/half_floating_point_set.cpp | 11 +- .../half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../impl/load_store_local_shared.cpp | 9 +- .../translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../translate/impl/video_set_predicate.cpp | 1 - .../ir_opt/collect_shader_info_pass.cpp | 20 +-- .../ir_opt/constant_propagation_pass.cpp | 49 +++--- .../global_memory_to_storage_buffer_pass.cpp | 42 +++--- .../ir_opt/identity_removal_pass.cpp | 3 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- .../ir_opt/ssa_rewrite_pass.cpp | 4 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 32 ++-- .../ir_opt/verification_pass.cpp | 4 +- src/tests/common/unique_function.cpp | 2 + src/video_core/CMakeLists.txt | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 - .../renderer_vulkan/vk_texture_cache.cpp | 2 +- 66 files changed, 308 insertions(+), 313 deletions(-) diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 0a975a869..cd0017726 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 22639fe13..551bf1c58 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -196,6 +196,8 @@ else() $<$:-Werror=unused-but-set-parameter> $<$:-Werror=unused-but-set-variable> -Werror=unused-variable + + $<$:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b738e00cc..0c114402b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie const std::string_view def_name_view( def_name.data(), fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + defs[static_cast(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 32512a0e5..355cf0ca8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -16,7 +16,7 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits {}; template struct FuncTraits { @@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - func(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } @@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ return Invoke<&Emit##name>(ctx, inst); #include "shader_recompiler/frontend/ir/opcodes.inc" #undef OPCODE } - throw LogicError("Invalid opcode {}", inst->Opcode()); + throw LogicError("Invalid opcode {}", inst->GetOpcode()); } Id TypeId(const EmitContext& ctx, IR::Type type) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f0f8db8c3..815ca6299 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -43,11 +43,13 @@ public: // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); return; } - const IR::Opcode opcode{values[0]->Opcode()}; - if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }}; + auto read{[&](unsigned int a, unsigned int b) { + return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); + }}; const Id offsets{ ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), @@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { - const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#ifdef _WIN32 #pragma optimize("", off) +#endif Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c57bd291d..12a03ed6e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; + [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 17640a622..71601f8fd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -7,7 +7,7 @@ namespace Shader { -class FileEnvironment final : public Environment { +class FileEnvironment : public Environment { public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 4811242ea..7993e5c43 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } - return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4; + return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } std::string NameOf(Attribute attribute) { @@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) { return fmt::format("", static_cast(attribute)); } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index ec029dfd6..e1f0191f4 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map& ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); for (const Inst& inst : block) { - const Opcode op{inst.Opcode()}; + const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); if (TypeOf(op) != Type::Void) { ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index ec1659e2b..fc18ea2a2 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -12,10 +12,10 @@ namespace Shader::IR { std::string NameOf(Condition condition) { std::string ret; - if (condition.FlowTest() != FlowTest::T) { - ret = fmt::to_string(condition.FlowTest()); + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); } - const auto [pred, negated]{condition.Pred()}; + const auto [pred, negated]{condition.GetPred()}; if (!ret.empty()) { ret += '&'; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 51c2f15cf..aa8597c60 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -30,11 +30,11 @@ public: auto operator<=>(const Condition&) const noexcept = default; - [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { return static_cast(flow_test); } - [[nodiscard]] std::pair Pred() const noexcept { + [[nodiscard]] std::pair GetPred() const noexcept { return {static_cast(pred), pred_negated != 0}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13eb2de4c..a2104bdb3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { } U1 IREmitter::Condition(IR::Condition cond) { - const FlowTest flow_test{cond.FlowTest()}; - const auto [pred, is_negated]{cond.Pred()}; + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 481202d94..ceb44e604 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -12,7 +12,7 @@ namespace Shader::IR { namespace { void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { - if (inst && inst->Opcode() != opcode) { + if (inst && inst->GetOpcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } @@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { } void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { - if (inst->Opcode() != expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } + +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} } // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { @@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void AllocAssociatedInsts(std::unique_ptr& associated_insts) { - if (!associated_insts) { - associated_insts = std::make_unique(); - } -} - void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 6658dc674..97dc91d85 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -46,7 +46,7 @@ public: } /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode Opcode() const noexcept { + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { return op; } @@ -95,7 +95,7 @@ public: requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; - std::memcpy(&ret, &flags, sizeof(ret)); + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); return ret; } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1cb9db6c9..002dbf94e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type{type_token}, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 5f51aeb5f..89a17fb1b 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 837c1b487..1e7ffb86d 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Identity; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; } bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Phi; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; } bool Value::IsEmpty() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..a0962863d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -94,7 +94,7 @@ public: } } - explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; using U1 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb1986..cb8ec7eaa 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e..932d19c1d 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d..008625cb3 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e..02cef2645 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea..5a1b3a8fc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff7321862..723841496 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95..4a49299a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c84..b8c89810c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209..80109ca0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e..b9f4ee0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde0..035f8782a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1..cf3cf1ba6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c..fa2a7807b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032..8ae437528 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae..06226b7ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db843..5f93a1513 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a21..7550a8d4c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047..f2738a93b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hadd2.low << 6) | static_cast((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | static_cast((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6a..fd7986701 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hfma2.low << 6) | static_cast((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | static_cast((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef6..3f548ce76 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hmul2.low << 6) | static_cast((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | static_cast((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c..cca5b831f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hset2.low << 6) | static_cast((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | static_cast((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92..b3931dae3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hsetp2.low << 6) | + static_cast((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | + static_cast((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4..88bbac0a5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast(imm.value) << 12}; return ir.Imm32(Common::BitCast(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e1815..8ffd84867 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2..5a0fc36a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea32..2300088e3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea..e24b49721 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d7..36c5cff2f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9..95d416586 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f4..fe2c7db85 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad..2ba9c1018 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23..0863bdfcd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bca..0459e5473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fcc..ec5e74f6d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1c03ee82a..edbfcd308 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { @@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { auto& cbufs{info.constant_buffer_descriptors}; cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), ConstantBufferDescriptor{ - .index{index}, - .count{1}, + .index = index, + .count = 1, }); } @@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } void VisitUsages(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: @@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::UndefU8: @@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: case IR::Opcode::UndefU16: @@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::DemoteToHelperInvocation: info.uses_demote_to_helper_invocation = true; break; @@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } else { throw NotImplementedException("Constant buffer with non-immediate index"); } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; @@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } void VisitFpModifiers(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: case IR::Opcode::FPFma16: case IR::Opcode::FPMul16: @@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) { info.stores_position |= header.vtg.omap_systemb.position != 0; } } - } // Anonymous namespace void CollectShaderInfoPass(Environment& env, IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 1720d7a09..61fbbe04c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; - if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; - if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { return false; } IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; - if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { return false; } if (lhs_shl->Arg(0).IsImmediate()) { @@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; - if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { @@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; - if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } - if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { @@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && - a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; IR::Inst* op_b{inst.Arg(1).InstRecursive()}; @@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) { return; } // It's also possible a value is being added to a cbuf and then subtracted - if (op_b->Opcode() == IR::Opcode::IAdd32) { + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbufU32) { + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; - if (op_a->Opcode() != IR::Opcode::IAdd32) { + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { return; } IR::Value add_op_a{op_a->Arg(0)}; @@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; - if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { return; } const IR::Value recip_source{rhs_op->Arg(0)}; @@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const attr_a{recip_source.InstRecursive()}; IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; - if (attr_a->Opcode() != IR::Opcode::GetAttribute || - attr_b->Opcode() != IR::Opcode::GetAttribute) { + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { return; } if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { @@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) { return; } IR::Inst* const arg{value.InstRecursive()}; - if (arg->Opcode() == IR::Opcode::LogicalNot) { + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { inst.ReplaceUsesWith(arg->Arg(0)); } } @@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { // Replace the bitcast with a typed constant buffer read inst.ReplaceOpcode(IR::Opcode::GetCbufF32); inst.SetArg(0, arg_inst->Arg(0)); @@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } @@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { template IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { using Traits = LambdaTraits; - return IR::Value{func(Arg>(inst.Arg(I))...)}; + return IR::Value{func(Arg>(inst.Arg(I))...)}; } void FoldBranchConditional(IR::Inst& inst) { @@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) { return; } const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { const IR::Value true_label{inst.Arg(1)}; const IR::Value false_label{inst.Arg(2)}; // Remove negation on the conditional (take the parameter out of LogicalNot) and swap @@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) { std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; - if (inst->Opcode() == construct) { + if (inst->GetOpcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { + if (inst->GetOpcode() != insert) { return std::nullopt; } IR::Value value_index{inst->Arg(2)}; @@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser } void ConstantPropagation(IR::Block& block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0858a0bdd..90a65dd16 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -57,7 +57,7 @@ struct StorageInfo { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemoryWrite(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS16: @@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); } } @@ -184,7 +184,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { // This address is expected to either be a PackUint2x32 or a IAdd64 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address // instruction. This expects for the instruction to be canonicalized having the address on // the first argument and the immediate offset on the second one. @@ -200,7 +200,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { addr_inst = iadd_addr.Inst(); } // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { return std::nullopt; } // PackUint2x32 is expected to be generated from a vector @@ -210,20 +210,20 @@ std::optional TrackLowAddress(IR::Inst* inst) { } // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ .value{IR::U32{vector_inst->Arg(0)}}, - .imm_offset{imm_offset}, + .imm_offset = imm_offset, }; } /// Tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ - .index{0}, - .offset_begin{0x110}, - .offset_end{0x610}, + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) info.set.insert(*storage_buffer); info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{&inst}, - .block{&block}, + .inst = &inst, + .block = &block, }); } @@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer /// Replace a global memory load instruction with its storage buffer equivalent void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; inst.ReplaceUsesWith(value); @@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, /// Replace a global memory write instruction with its storage buffer equivalent void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); inst.Invalidate(); @@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } } } // Anonymous namespace @@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ - .cbuf_index{storage_buffer.index}, - .cbuf_offset{storage_buffer.offset}, - .count{1}, + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 8790b48f2..38af72dfe 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) { inst->SetArg(i, arg.Inst()->Arg(0)); } } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { to_invalidate.push_back(&*inst); inst = block->Instructions().erase(inst); } else { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0d2c91ed6..52576b07f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) { void LowerFp16ToFp32(IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); + inst.ReplaceOpcode(Replace(inst.GetOpcode())); } } } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ca36253d1..346fcc377 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.Opcode() == IR::Opcode::Phi; + return inst.GetOpcode() == IR::Opcode::Phi; } enum class Status { @@ -278,7 +278,7 @@ private: }; void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { pass.WriteVariable(reg, block, inst.Arg(1)); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 290ce4179..c8aee3d3d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -30,7 +30,7 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; IR::Opcode IndexedInstruction(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BoundImageSampleImplicitLod: return IR::Opcode::ImageSampleImplicitLod; @@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } bool IsBindless(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: @@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: return false; default: - throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); } } @@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) { } std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = *track_addr; } else { addr = ConstBufferAddr{ - .index{env.TextureBoundBuffer()}, - .offset{inst.Arg(0).U32()}, + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), }; } return TextureInst{ .cbuf{addr}, - .inst{&inst}, - .block{block}, + .inst = &inst, + .block = block, }; } @@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); @@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .type = flags.type, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } inst->SetArg(0, IR::Value{index}); diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 4080b37cc..dbec96d84 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,14 +14,14 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Program& program) { for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { - if (inst.Opcode() == IR::Opcode::Phi) { + if (inst.GetOpcode() == IR::Opcode::Phi) { // Skip validation on phi nodes continue; } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; if (!IR::AreTypesCompatible(t1, t2)) { throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); } diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738..aa6e86593 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -17,10 +17,12 @@ struct Noisy { Noisy& operator=(Noisy&& rhs) noexcept { state = "Move assigned"; rhs.state = "Moved away"; + return *this; } Noisy(const Noisy&) : state{"Copied constructed"} {} Noisy& operator=(const Noisy&) { state = "Copied assigned"; + return *this; } std::string state; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 71b07c194..3166a69dc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,7 +203,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4a..57e2d569c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92..fcebb8f6e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab..991afe521 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5..70328680d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] std::vector TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); std::ranges::transform( From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: [PATCH 198/770] shader: Address feedback + clang format --- src/shader_recompiler/CMakeLists.txt | 2 ++ .../backend/spirv/emit_spirv_image.cpp | 4 ---- .../backend/spirv/emit_spirv_warp.cpp | 1 - src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- .../frontend/maxwell/control_flow.cpp | 4 ++-- .../frontend/maxwell/control_flow.h | 16 ++++++++-------- .../frontend/maxwell/decode.cpp | 2 +- .../maxwell/translate/impl/common_funcs.cpp | 5 +++-- .../maxwell/translate/impl/not_implemented.cpp | 1 - .../global_memory_to_storage_buffer_pass.cpp | 3 ++- src/shader_recompiler/object_pool.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.h | 4 ++-- 12 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 551bf1c58..6b5df23e2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -197,6 +197,8 @@ else() $<$:-Werror=unused-but-set-variable> -Werror=unused-variable + # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. + # And this in turns limits the size of a std::array. $<$:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 815ca6299..6a89c0f79 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -304,10 +304,6 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } -#ifdef _WIN32 -#pragma optimize("", off) -#endif - Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { const auto info{inst->Flags()}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 12a03ed6e..f6196653a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,6 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 002dbf94e..7d3e0b2ab 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type = type_token, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index cb8ec7eaa..9811183f1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -44,7 +44,7 @@ void Split(Block* old_block, Block* new_block, Location pc) { *new_block = Block{}; new_block->begin = pc; new_block->end = old_block->end; - new_block->end_class = old_block->end_class, + new_block->end_class = old_block->end_class; new_block->cond = old_block->cond; new_block->stack = old_block->stack; new_block->branch_true = old_block->branch_true; @@ -428,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += static_cast(brx_table->branch_offset); + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 9f570fbb5..89966b16a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -78,15 +78,15 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; - EndClass end_class; - IR::Condition cond; + EndClass end_class{}; + IR::Condition cond{}; Stack stack; - Block* branch_true; - Block* branch_false; - FunctionId function_call; - Block* return_block; - IR::Reg branch_reg; - s32 branch_offset; + Block* branch_true{}; + Block* branch_false{}; + FunctionId function_call{}; + Block* return_block{}; + IR::Reg branch_reg{}; + s32 branch_offset{}; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index 932d19c1d..972f677dc 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode = Opcode::name, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index d30e82b10..10bb01d99 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,8 +72,9 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, - FPCompareOp compare_op, IR::FpControl control) { +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { case FPCompareOp::F: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba0cfa673..c23901052 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,7 +65,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 90a65dd16..afe871505 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -164,7 +164,8 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", + inst.GetOpcode()); } } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 424281634..f8b255b66 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template - requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v[[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward(args)...); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 104e6cc85..8efdc2926 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include #include "common/common_types.h" #include "common/thread_worker.h" From 094da34456bbf56353211b47fcb227c09637aa15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 03:44:05 -0300 Subject: [PATCH 199/770] shader: Fix Windows build issues --- .../translate/impl/floating_point_conversion_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index cf3cf1ba6..3cb896950 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -193,7 +193,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; } } From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: [PATCH 200/770] shader: Implement SULD and SUST --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 97 ++++-- .../backend/spirv/emit_context.h | 9 +- .../backend/spirv/emit_spirv.cpp | 1 + .../backend/spirv/emit_spirv.h | 6 + .../backend/spirv/emit_spirv_image.cpp | 46 ++- .../frontend/ir/ir_emitter.cpp | 11 + .../frontend/ir/ir_emitter.h | 7 +- src/shader_recompiler/frontend/ir/modifiers.h | 12 +- src/shader_recompiler/frontend/ir/opcodes.inc | 6 + .../translate/impl/not_implemented.cpp | 8 - .../translate/impl/surface_load_store.cpp | 280 ++++++++++++++++++ .../maxwell/translate/impl/texture_fetch.cpp | 19 +- .../translate/impl/texture_fetch_swizzled.cpp | 12 +- .../maxwell/translate/impl/texture_gather.cpp | 19 +- .../impl/texture_gather_swizzled.cpp | 3 +- .../translate/impl/texture_gradient.cpp | 18 +- .../maxwell/translate/impl/texture_load.cpp | 18 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../translate/impl/texture_mipmap_level.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 91 ++++-- src/shader_recompiler/shader_info.h | 61 ++-- src/video_core/renderer_vulkan/blit_image.cpp | 4 +- .../renderer_vulkan/pipeline_helper.h | 43 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 + .../renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 112 +++++-- .../renderer_vulkan/vk_texture_cache.h | 23 +- src/video_core/texture_cache/texture_cache.h | 8 + 31 files changed, 739 insertions(+), 209 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6b5df23e2..8e1d37373 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/surface_load_store.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0c114402b..32f8c4508 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -18,41 +18,70 @@ namespace { Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; + const bool depth{desc.is_depth}; switch (desc.type) { case TextureType::Color1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); case TextureType::ColorArray1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); case TextureType::Color3D: - return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); case TextureType::ColorCube: - return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); case TextureType::ColorArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); - case TextureType::Shadow1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); - case TextureType::ShadowArray1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); - case TextureType::Shadow2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); - case TextureType::ShadowArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); - case TextureType::Shadow3D: - return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); - case TextureType::ShadowCube: - return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); - case TextureType::ShadowArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); case TextureType::Buffer: break; } throw InvalidArgument("Invalid texture type {}", desc.type); } +Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { + const spv::ImageFormat format{[&] { + switch (desc.format) { + case ImageFormat::Typeless: + return spv::ImageFormat::Unknown; + case ImageFormat::R8_UINT: + return spv::ImageFormat::R8ui; + case ImageFormat::R8_SINT: + return spv::ImageFormat::R8i; + case ImageFormat::R16_UINT: + return spv::ImageFormat::R16ui; + case ImageFormat::R16_SINT: + return spv::ImageFormat::R16i; + case ImageFormat::R32_UINT: + return spv::ImageFormat::R32ui; + case ImageFormat::R32G32_UINT: + return spv::ImageFormat::Rg32ui; + case ImageFormat::R32G32B32A32_UINT: + return spv::ImageFormat::Rgba32ui; + } + throw InvalidArgument("Invalid image format {}", desc.format); + }()}; + const Id type{ctx.U32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); + case TextureType::Buffer: + throw NotImplementedException("Image buffer"); + default: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + Id DefineVariable(EmitContext& ctx, Id type, std::optional builtin, spv::StorageClass storage_class) { const Id pointer_type{ctx.TypePointer(storage_class, type)}; @@ -134,6 +163,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); DefineTextures(program.info, binding); + DefineImages(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -572,6 +602,31 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } +void EmitContext::DefineImages(const Info& info, u32& binding) { + images.reserve(info.image_descriptors.size()); + for (const ImageDescriptor& desc : info.image_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of textures"); + } + const Id image_type{ImageType(*this, desc)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("img{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + for (u32 index = 0; index < desc.count; ++index) { + images.push_back(ImageDefinition{ + .id{id}, + .image_type{image_type}, + }); + } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index f1ac4430c..e70f3458c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -35,6 +35,11 @@ struct TextureDefinition { Id image_type; }; +struct ImageDefinition { + Id id; + Id image_type; +}; + struct UniformDefinitions { Id U8{}; Id S8{}; @@ -95,8 +100,9 @@ public: std::array cbufs{}; std::array ssbos{}; - std::vector textures; std::vector texture_buffers; + std::vector textures; + std::vector images; Id workgroup_id{}; Id local_invocation_id{}; @@ -156,6 +162,7 @@ private: void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); + void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 355cf0ca8..ecd0fac5c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -253,6 +253,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::SampledBuffer); + ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 08460c94e..a39b16f1e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -369,6 +369,8 @@ Id EmitBindlessImageFetch(EmitContext&); Id EmitBindlessImageQueryDimensions(EmitContext&); Id EmitBindlessImageQueryLod(EmitContext&); Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); @@ -379,6 +381,8 @@ Id EmitBoundImageFetch(EmitContext&); Id EmitBoundImageQueryDimensions(EmitContext&); Id EmitBoundImageQueryLod(EmitContext&); Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -397,6 +401,8 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6a89c0f79..dd261fd47 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -144,6 +144,18 @@ Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo in } } +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + throw NotImplementedException("Image buffer"); + } else { + const ImageDefinition def{ctx.images.at(index.U32())}; + return ctx.OpLoad(def.image_type, def.id); + } +} + Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { const auto info{inst->Flags()}; if (info.relaxed_precision != 0) { @@ -209,6 +221,14 @@ Id EmitBindlessImageGradient(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -249,6 +269,14 @@ Id EmitBoundImageGradient(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -322,23 +350,16 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { case TextureType::Color1D: - case TextureType::Shadow1D: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), zero, zero, mips()); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: - case TextureType::ShadowArray1D: - case TextureType::Shadow2D: - case TextureType::ShadowCube: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), zero, mips()); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - case TextureType::ShadowArray2D: - case TextureType::Shadow3D: - case TextureType::ShadowArrayCube: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), mips()); case TextureType::Buffer: @@ -365,4 +386,15 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I coords, operands.Mask(), operands.Span()); } +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags()}; + return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], + Image(ctx, index, info), coords, std::nullopt, std::span{}); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { + const auto info{inst->Flags()}; + ctx.OpImageWrite(Image(ctx, index, info), coords, color); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index a2104bdb3..17be0c639 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1620,6 +1620,17 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const V return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); } +Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead}; + return Inst(op, Flags{info}, handle, coords); +} + +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; + Inst(op, Flags{info}, handle, coords, color); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2cab1dc5d..ec60070ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -265,20 +265,19 @@ public: [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); - [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); - [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); - [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivates, const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); + [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 461671326..447e9703c 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -43,11 +43,13 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; - BitField<8, 1, u32> has_bias; - BitField<9, 1, u32> has_lod_clamp; - BitField<10, 1, u32> relaxed_precision; - BitField<11, 2, u32> gather_component; - BitField<13, 2, u32> num_derivates; + BitField<8, 1, u32> is_depth; + BitField<9, 1, u32> has_bias; + BitField<10, 1, u32> has_lod_clamp; + BitField<11, 1, u32> relaxed_precision; + BitField<12, 2, u32> gather_component; + BitField<14, 2, u32> num_derivates; + BitField<16, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1697de965..82c5b37ba 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -389,6 +389,8 @@ OPCODE(BindlessImageFetch, F32x4, U32, OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) +OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -400,6 +402,8 @@ OPCODE(BoundImageFetch, F32x4, U32, OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageRead, U32x4, U32, Opaque, ) +OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -411,6 +415,8 @@ OPCODE(ImageFetch, F32x4, U32, OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, U32, Opaque, ) +OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c23901052..327941223 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -281,18 +281,10 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SULD(u64) { - ThrowNotImplemented(Opcode::SULD); -} - void TranslatorVisitor::SURED(u64) { ThrowNotImplemented(Opcode::SURED); } -void TranslatorVisitor::SUST(u64) { - ThrowNotImplemented(Opcode::SUST); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..9a2d16a6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -0,0 +1,280 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +constexpr unsigned R = 1 << 0; +constexpr unsigned G = 1 << 1; +constexpr unsigned B = 1 << 2; +constexpr unsigned A = 1 << 3; + +constexpr std::array MASK{ + 0U, // + R, // + G, // + R | G, // + B, // + R | B, // + G | B, // + R | G | B, // + A, // + R | A, // + G | A, // + R | G | A, // + B | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +enum class LoadCache : u64 { + Default, + CG, + CI, + CV, +}; + +enum class StoreCache : u64 { + Default, + CG, + CS, + WT, +}; + +ImageFormat Format(Size size) { + switch (size) { + case Size::U8: + return ImageFormat::R8_UINT; + case Size::S8: + return ImageFormat::R8_SINT; + case Size::U16: + return ImageFormat::R16_UINT; + case Size::S16: + return ImageFormat::R16_SINT; + case Size::B32: + return ImageFormat::R32_UINT; + case Size::B64: + return ImageFormat::R32G32_UINT; + case Size::B128: + return ImageFormat::R32G32B32A32_UINT; + } + throw NotImplementedException("Invalid size {}", size); +} + +int SizeInRegs(Size size) { + switch (size) { + case Size::U8: + case Size::S8: + case Size::U16: + case Size::S16: + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B128: + return 4; + } + throw NotImplementedException("Invalid size {}", size); +} + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg), array(1)); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + } + throw NotImplementedException("Invalid type {}", type); +} + +unsigned SwizzleMask(u64 swizzle) { + if (swizzle == 0 || swizzle >= MASK.size()) { + throw NotImplementedException("Invalid swizzle {}", swizzle); + } + return MASK[swizzle]; +} + +IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { + std::array colors; + for (int i = 0; i < num_regs; ++i) { + colors[i] = ir.GetReg(reg + i); + } + for (int i = num_regs; i < 4; ++i) { + colors[i] = ir.Imm32(0); + } + return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); +} +} // Anonymous namespace + +void TranslatorVisitor::SULD(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, LoadCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const suld{insn}; + + if (suld.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", suld.clamp.Value()); + } + if (suld.cache != LoadCache::Default) { + throw NotImplementedException("Cache {}", suld.cache.Value()); + } + const bool is_typed{suld.d != 0}; + if (is_typed && suld.ba != 0) { + throw NotImplementedException("BA"); + } + + const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; + const TextureType type{GetType(suld.type)}; + const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; + const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast(suld.bound_offset * 4)) + : X(suld.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + const IR::Value result{ir.ImageRead(handle, coords, info)}; + IR::Reg dest_reg{suld.dest_reg}; + if (is_typed) { + const int num_regs{SizeInRegs(suld.size)}; + for (int i = 0; i < num_regs; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } + } else { + const unsigned mask{SwizzleMask(suld.swizzle)}; + const int bits{std::popcount(mask)}; + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + throw NotImplementedException("Unaligned destination register"); + } + for (unsigned component = 0; component < 4; ++component) { + if (((mask >> component) & 1) == 0) { + continue; + } + X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); + ++dest_reg; + } + } +} + +void TranslatorVisitor::SUST(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, StoreCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sust{insn}; + + if (sust.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", sust.clamp.Value()); + } + if (sust.cache != StoreCache::Default) { + throw NotImplementedException("Cache {}", sust.cache.Value()); + } + const bool is_typed{sust.d != 0}; + if (is_typed && sust.ba != 0) { + throw NotImplementedException("BA"); + } + const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; + const TextureType type{GetType(sust.type)}; + const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; + const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast(sust.bound_offset * 4)) + : X(sust.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + IR::Value color; + if (is_typed) { + color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); + } else { + const unsigned mask{SwizzleMask(sust.swizzle)}; + if (mask != 0xf) { + throw NotImplementedException("Non-full mask"); + } + color = MakeColor(ir, sust.data_reg, 4); + } + ir.ImageWrite(handle, coords, color, info); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 95d416586..9671d115e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -33,24 +33,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -169,7 +169,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.type.Assign(GetType(tex.type)); + info.is_depth.Assign(tex.dc != 0 ? 1 : 0); info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); info.has_lod_clamp.Assign(lc ? 1 : 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index fe2c7db85..3500a4559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -95,18 +95,21 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { {}, info); case 4: // 2D.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, {}, {}, info); case 5: // 2D.LL.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b + 1), v.F(reg_b), {}, {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), zero, {}, {}, info); case 7: // ARRAY_2D @@ -124,7 +127,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::ShadowArray2D); + info.type.Assign(TextureType::ColorArray2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), v.F(reg_b + 1), zero, {}, {}, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index b2f9cda46..218cbc1a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -37,24 +37,24 @@ enum class ComponentType : u64 { A = 3, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -163,7 +163,8 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.type.Assign(GetType(tld4.type)); + info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); info.gather_component.Assign(static_cast(component_type)); const IR::Value sample{[&] { if (tld4.dc == 0) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index 2ba9c1018..34efa2d50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -59,7 +59,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.relaxed_precision.Assign(1); } info.gather_component.Assign(static_cast(tld4s.component_type.Value())); - info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + info.type.Assign(Shader::TextureType::Color2D); + info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); IR::Value coords; if (tld4s.aoffi != 0) { CheckAlignment(reg_a, 2); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index c66468a48..c3fe3ffda 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -152,7 +152,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; - info.type.Assign(GetType(txd.type, false)); + info.type.Assign(GetType(txd.type)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index 987b7ec34..983058303 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -137,7 +137,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { throw NotImplementedException("TLD.CL - CLAMP is not implmented"); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld.type, false)); + info.type.Assign(GetType(tld.type)); const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 0863bdfcd..5dd7e31b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include "common/bit_field.h" #include "common/common_types.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index b6efc04f0..2277d24ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -97,7 +97,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tmml.type, false)); + info.type.Assign(GetType(tmml.type)); const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; IR::Reg dest_reg{tmml.dest_reg}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index edbfcd308..bc23b0211 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -416,8 +416,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryLod: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || - type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index c8aee3d3d..a7b1fcfad 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -61,6 +61,12 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: case IR::Opcode::BindlessImageGradient: return IR::Opcode::ImageGradient; + case IR::Opcode::BoundImageRead: + case IR::Opcode::BindlessImageRead: + return IR::Opcode::ImageRead; + case IR::Opcode::BoundImageWrite: + case IR::Opcode::BindlessImageWrite: + return IR::Opcode::ImageWrite; default: return IR::Opcode::Void; } @@ -78,6 +84,8 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageQueryDimensions: case IR::Opcode::BindlessImageQueryLod: case IR::Opcode::BindlessImageGradient: + case IR::Opcode::BindlessImageRead: + case IR::Opcode::BindlessImageWrite: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -89,6 +97,8 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BoundImageGradient: + case IR::Opcode::BoundImageRead: + case IR::Opcode::BoundImageWrite: return false; default: throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); @@ -147,10 +157,18 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(TextureDescriptors& texture_descriptors_, - TextureBufferDescriptors& texture_buffer_descriptors_) - : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{ - texture_buffer_descriptors_} {} + explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + TextureDescriptors& texture_descriptors_, + ImageDescriptors& image_descriptors_) + : texture_buffer_descriptors{texture_buffer_descriptors_}, + texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { @@ -159,11 +177,14 @@ public: }); } - u32 Add(const TextureBufferDescriptor& desc) { - return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && + u32 Add(const ImageDescriptor& desc) { + const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { + return desc.type == existing.type && desc.format == existing.format && + desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset; - }); + })}; + image_descriptors[index].is_written |= desc.is_written; + return index; } private: @@ -178,8 +199,9 @@ private: return static_cast(descriptors.size()) - 1; } - TextureDescriptors& texture_descriptors; TextureBufferDescriptors& texture_buffer_descriptors; + TextureDescriptors& texture_descriptors; + ImageDescriptors& image_descriptors; }; } // Anonymous namespace @@ -201,8 +223,9 @@ void TexturePass(Environment& env, IR::Program& program) { return lhs.cbuf.index < rhs.cbuf.index; }); Descriptors descriptors{ - program.info.texture_descriptors, program.info.texture_buffer_descriptors, + program.info.texture_descriptors, + program.info.image_descriptors, }; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays @@ -233,19 +256,41 @@ void TexturePass(Environment& env, IR::Program& program) { break; } u32 index; - if (flags.type == TextureType::Buffer) { - index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index = cbuf.index, - .cbuf_offset = cbuf.offset, - .count = 1, - }); - } else { - index = descriptors.Add(TextureDescriptor{ - .type = flags.type, - .cbuf_index = cbuf.index, - .cbuf_offset = cbuf.offset, - .count = 1, - }); + switch (inst->GetOpcode()) { + case IR::Opcode::ImageRead: + case IR::Opcode::ImageWrite: { + const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; + if (flags.type == TextureType::Buffer) { + throw NotImplementedException("Image buffer"); + } else { + index = descriptors.Add(ImageDescriptor{ + .type = flags.type, + .format = flags.image_format, + .is_written = is_written, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } + break; + } + default: + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type = flags.type, + .is_depth = flags.is_depth != 0, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } + break; } inst->SetArg(0, IR::Value{index}); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4cc731198..253b6eacf 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -22,15 +22,20 @@ enum class TextureType : u32 { Color3D, ColorCube, ColorArrayCube, - Shadow1D, - ShadowArray1D, - Shadow2D, - ShadowArray2D, - Shadow3D, - ShadowCube, - ShadowArrayCube, Buffer, }; +constexpr u32 NUM_TEXTURE_TYPES = 8; + +enum class ImageFormat : u32 { + Typeless, + R8_UINT, + R8_SINT, + R16_UINT, + R16_SINT, + R32_UINT, + R32G32_UINT, + R32G32B32A32_UINT, +}; enum class Interpolation { Smooth, @@ -43,21 +48,6 @@ struct InputVarying { bool used{false}; }; -struct TextureDescriptor { - TextureType type; - u32 cbuf_index; - u32 cbuf_offset; - u32 count; -}; -using TextureDescriptors = boost::container::small_vector; - -struct TextureBufferDescriptor { - u32 cbuf_index; - u32 cbuf_offset; - u32 count; -}; -using TextureBufferDescriptors = boost::container::small_vector; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -70,6 +60,32 @@ struct StorageBufferDescriptor { bool is_written; }; +struct TextureBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureBufferDescriptors = boost::container::small_vector; + +struct TextureDescriptor { + TextureType type; + bool is_depth; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureDescriptors = boost::container::small_vector; + +struct ImageDescriptor { + TextureType type; + ImageFormat format; + bool is_written; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using ImageDescriptors = boost::container::small_vector; + struct Info { static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_SSBOS{16}; @@ -121,6 +137,7 @@ struct Info { boost::container::static_vector storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; TextureDescriptors texture_descriptors; + ImageDescriptors image_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c0d5c7f4..39fe9289b 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -361,7 +361,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); @@ -435,7 +435,7 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index cff93cc60..d2c3f11c1 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,6 +97,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.image_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); + } } private: @@ -127,36 +130,6 @@ private: size_t offset{}; }; -inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - case Shader::TextureType::Buffer: - break; - } - UNREACHABLE_MSG("Invalid texture type {}", type); - return {}; -} - inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { @@ -164,9 +137,17 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); } + for (const auto& desc : info.image_descriptors) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ac47b1f3c..3d690f335 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -108,6 +108,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 57e2d569c..23c01f24e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -186,6 +186,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0292a1b94..2ba44330f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -494,7 +494,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, if (!image_view) { return false; } - screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 70328680d..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -215,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return VK_COMPONENT_SWIZZLE_ZERO; } +[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + return VK_IMAGE_VIEW_TYPE_1D; + case Shader::TextureType::Color2D: + return VK_IMAGE_VIEW_TYPE_2D; + case Shader::TextureType::ColorCube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case Shader::TextureType::Color3D: + return VK_IMAGE_VIEW_TYPE_3D; + case Shader::TextureType::ColorArray1D: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case Shader::TextureType::ColorArray2D: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case Shader::TextureType::ColorArrayCube: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case Shader::TextureType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; + } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} + [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { switch (type) { case VideoCommon::ImageViewType::e1D: @@ -232,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { case VideoCommon::ImageViewType::CubeArray: return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case VideoCommon::ImageViewType::Rect: - LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + UNIMPLEMENTED_MSG("Rect image view"); return VK_IMAGE_VIEW_TYPE_2D; case VideoCommon::ImageViewType::Buffer: UNREACHABLE_MSG("Texture buffers can't be image views"); @@ -539,6 +563,28 @@ struct RangedBarrierRange { } }; +[[nodiscard]] VkFormat Format(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return VK_FORMAT_R8_SINT; + case Shader::ImageFormat::R8_UINT: + return VK_FORMAT_R8_UINT; + case Shader::ImageFormat::R16_UINT: + return VK_FORMAT_R16_UINT; + case Shader::ImageFormat::R16_SINT: + return VK_FORMAT_R16_SINT; + case Shader::ImageFormat::R32_UINT: + return VK_FORMAT_R32_UINT; + case Shader::ImageFormat::R32G32_UINT: + return VK_FORMAT_R32G32_UINT; + case Shader::ImageFormat::R32G32B32A32_UINT: + return VK_FORMAT_R32G32B32A32_UINT; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return VK_FORMAT_R32_UINT; +} } // Anonymous namespace void TextureCacheRuntime::Finish() { @@ -577,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } } - ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(src.format == dst.format); ASSERT(!(is_dst_msaa && !is_src_msaa)); ASSERT(operation == Fermi2D::Operation::SrcCopy); @@ -915,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span swizzle{ SwizzleSource::R, @@ -954,39 +1001,39 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; - const auto create = [&](VideoCommon::ImageViewType view_type, std::optional num_layers) { + const auto create = [&](TextureType tex_type, std::optional num_layers) { VkImageViewCreateInfo ci{create_info}; - ci.viewType = ImageViewType(view_type); + ci.viewType = ImageViewType(tex_type); if (num_layers) { ci.subresourceRange.layerCount = *num_layers; } vk::ImageView handle = device->GetLogical().CreateImageView(ci); if (device->HasDebuggingToolAttached()) { - handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } - image_views[static_cast(view_type)] = std::move(handle); + image_views[static_cast(tex_type)] = std::move(handle); }; switch (info.type) { case VideoCommon::ImageViewType::e1D: case VideoCommon::ImageViewType::e1DArray: - create(VideoCommon::ImageViewType::e1D, 1); - create(VideoCommon::ImageViewType::e1DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e1DArray); + create(TextureType::Color1D, 1); + create(TextureType::ColorArray1D, std::nullopt); + render_target = Handle(TextureType::ColorArray1D); break; case VideoCommon::ImageViewType::e2D: case VideoCommon::ImageViewType::e2DArray: - create(VideoCommon::ImageViewType::e2D, 1); - create(VideoCommon::ImageViewType::e2DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e2DArray); + create(TextureType::Color2D, 1); + create(TextureType::ColorArray2D, std::nullopt); + render_target = Handle(Shader::TextureType::ColorArray2D); break; case VideoCommon::ImageViewType::e3D: - create(VideoCommon::ImageViewType::e3D, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e3D); + create(TextureType::Color3D, std::nullopt); + render_target = Handle(Shader::TextureType::Color3D); break; case VideoCommon::ImageViewType::Cube: case VideoCommon::ImageViewType::CubeArray: - create(VideoCommon::ImageViewType::Cube, 6); - create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + create(TextureType::ColorCube, 6); + create(TextureType::ColorArrayCube, std::nullopt); break; case VideoCommon::ImageViewType::Rect: UNIMPLEMENTED(); @@ -1009,7 +1056,8 @@ VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; } - depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); return *depth_view; } @@ -1017,18 +1065,38 @@ VkImageView ImageView::StencilView() { if (stencil_view) { return *stencil_view; } - stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); return *stencil_view; } -vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { +VkImageView ImageView::StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + auto& view{views[static_cast(texture_type)]}; + if (view) { + return *view; + } + view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); + return *view; +} + +vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = image_handle, .viewType = ImageViewType(type), - .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, + .format = vk_format, .components{ .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 498e76a1c..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -133,8 +134,11 @@ public: [[nodiscard]] VkImageView StencilView(); - [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { - return *image_views[static_cast(query_type)]; + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { + return *image_views[static_cast(texture_type)]; } [[nodiscard]] VkImage ImageHandle() const noexcept { @@ -145,10 +149,6 @@ public: return render_target; } - [[nodiscard]] PixelFormat ImageFormat() const noexcept { - return image_format; - } - [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { return samples; } @@ -162,15 +162,20 @@ public: } private: - [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); + struct StorageViews { + std::array signeds; + std::array unsigneds; + }; + + [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; - std::array image_views; + std::array image_views; + std::unique_ptr storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; - PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5e8d99482..255b07cf8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -117,6 +117,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + /// Fill image_view_ids with the graphics images in indices void FillGraphicsImageViews(std::span indices, std::span image_view_ids); @@ -526,6 +529,11 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { return slot_image_views[id]; } +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + template void TextureCache

::FillGraphicsImageViews(std::span indices, std::span image_view_ids) { From 8cea39b5a63b350f7c6334b91b9d7e2b30bd61bf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 03:52:49 -0300 Subject: [PATCH 201/770] shader: Remove outdated comment in F2I --- .../translate/impl/floating_point_conversion_integer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 3cb896950..92b1ce015 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -142,10 +142,6 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } }()}; - - // TODO: Handle out of bounds conversions. - // For example converting F32 65537.0 to U16, the expected value is 0xffff, - const bool is_signed{f2i.is_signed != 0}; const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); From 1be6705408d1a3454146c705fae3dc55031e966e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 10 Apr 2021 00:29:12 +0200 Subject: [PATCH 202/770] shader: Implement CC for ISET, FSET, PSET, CSET, and DSET Throw when other instructions are missing CC. --- .../maxwell/translate/impl/bitfield_extract.cpp | 5 +++++ .../maxwell/translate/impl/bitfield_insert.cpp | 5 +++++ .../translate/impl/condition_code_set.cpp | 16 ++++++++++++++-- .../translate/impl/double_compare_and_set.cpp | 17 +++++++++++++++-- .../impl/double_fused_multiply_add.cpp | 5 +++++ .../maxwell/translate/impl/double_min_max.cpp | 5 +++++ .../maxwell/translate/impl/double_multiply.cpp | 5 +++++ .../impl/floating_point_compare_and_set.cpp | 17 +++++++++++++++-- ...floating_point_conversion_floating_point.cpp | 5 +++++ .../translate/impl/floating_point_min_max.cpp | 5 +++++ .../translate/impl/integer_compare_and_set.cpp | 17 ++++++++++++++--- .../impl/integer_floating_point_conversion.cpp | 2 +- .../translate/impl/integer_funnel_shift.cpp | 5 +++++ .../translate/impl/integer_minimum_maximum.cpp | 5 +++++ .../translate/impl/integer_shift_right.cpp | 4 ++++ .../translate/impl/load_effective_address.cpp | 10 +++++++++- .../impl/logic_operation_three_input.cpp | 5 +++++ .../translate/impl/predicate_set_register.cpp | 16 ++++++++++++++-- 18 files changed, 136 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp index 4a03e6939..0738ae7a6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -14,9 +14,14 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> offset_reg; BitField<40, 1, u64> brev; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const bfe{insn}; + if (bfe.cc != 0) { + throw NotImplementedException("BFE CC"); + } + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp index ee312c30d..fb7f821e6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -13,8 +13,13 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> insert_reg; + BitField<47, 1, u64> cc; } const bfi{insn}; + if (bfi.cc != 0) { + throw NotImplementedException("BFI CC"); + } + const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; const IR::U32 max_size{v.ir.Imm32(32)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp index ea0c40a54..420f2fb94 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -18,17 +18,29 @@ void TranslatorVisitor::CSET(u64 insn) { BitField<42, 1, u64> neg_bop_pred; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; } const cset{insn}; const IR::U32 one_mask{ir.Imm32(-1)}; const IR::U32 fp_one{ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{ir.Imm32(0)}; + const IR::U32 zero{ir.Imm32(0)}; const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; - const IR::U32 result{ir.Select(pred_result, pass_result, fail_result)}; + const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; X(cset.dest_reg, result); + if (cset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (cset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } } void TranslatorVisitor::CSETP(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp index e2ec852c9..1173192e4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -19,6 +19,7 @@ void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_b; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 4, FPCompareOp> compare_op; BitField<52, 1, u64> bf; BitField<53, 1, u64> negate_b; @@ -37,10 +38,22 @@ void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; - v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); + v.X(dset.dest_reg, result); + if (dset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (dset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index 723841496..f66097014 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -16,10 +16,15 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_a_reg; BitField<50, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<49, 1, u64> neg_c; } const dfma{insn}; + if (dfma.cc != 0) { + throw NotImplementedException("DFMA CC"); + } + const IR::F64 src_a{v.D(dfma.src_a_reg)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp index 55a224db3..6b551847c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -17,10 +17,15 @@ void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<42, 1, u64> neg_pred; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; } const dmnmx{insn}; + if (dmnmx.cc != 0) { + throw NotImplementedException("DMNMX CC"); + } + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 4a49299a0..c0159fb65 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -16,9 +16,14 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_a_reg; BitField<39, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; BitField<48, 1, u64> neg; } const dmul{insn}; + if (dmul.cc != 0) { + throw NotImplementedException("DMUL CC"); + } + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ .no_contraction = true, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index b9f4ee0d9..eece4f28f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -19,6 +19,7 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_b; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 4, FPCompareOp> compare_op; BitField<52, 1, u64> bf; BitField<53, 1, u64> negate_b; @@ -43,10 +44,22 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; - v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); + v.X(fset.dest_reg, result); + if (fset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (fset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 035f8782a..ce2cf470d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -41,6 +41,7 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { BitField<0, 8, IR::Reg> dest_reg; BitField<44, 1, u64> ftz; BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; BitField<50, 1, u64> sat; BitField<39, 4, u64> rounding_op; BitField<39, 2, FpRounding> rounding; @@ -53,6 +54,10 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { } } const f2f{insn}; + if (f2f.cc != 0) { + throw NotImplementedException("F2F CC"); + } + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 8ae437528..c0d6ee5af 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -18,10 +18,15 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<44, 1, u64> ftz; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; } const fmnmx{insn}; + if (fmnmx.cc) { + throw NotImplementedException("FMNMX CC"); + } + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 914af010f..a2cd8d7c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -19,6 +19,7 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<43, 1, u64> x; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const iset{insn}; @@ -38,12 +39,22 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; - - const IR::U32 result{v.ir.Select(bop_result, pass_result, fail_result)}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; v.X(iset.dest_reg, result); + if (iset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (iset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index 5a0fc36a0..3c233597f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -62,7 +62,7 @@ IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { const Encoding i2f{insn}; if (i2f.cc != 0) { - throw NotImplementedException("CC"); + throw NotImplementedException("I2F CC"); } const bool is_signed{i2f.is_signed != 0}; int src_bitsize{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp index d8d6c939e..5feefc0ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -33,9 +33,14 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> lo_bits_reg; BitField<37, 2, MaxShift> max_shift; + BitField<47, 1, u64> cc; BitField<48, 2, u64> x_mode; BitField<50, 1, u64> wrap; } const shf{insn}; + + if (shf.cc != 0) { + throw NotImplementedException("SHF CC"); + } if (shf.x_mode != 0) { throw NotImplementedException("SHF X Mode"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 40f14ab8a..1badbacc4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -16,9 +16,14 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<39, 3, IR::Pred> pred; BitField<42, 1, u64> neg_pred; BitField<43, 2, u64> mode; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const imnmx{insn}; + if (imnmx.cc != 0) { + throw NotImplementedException("IMNMX CC"); + } + if (imnmx.mode != 0) { throw NotImplementedException("IMNMX.MODE"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp index 4025b1358..be00bb605 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -16,12 +16,16 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { BitField<39, 1, u64> is_wrapped; BitField<40, 1, u64> brev; BitField<43, 1, u64> xmode; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const shr{insn}; if (shr.xmode != 0) { throw NotImplementedException("SHR.XMODE"); } + if (shr.cc != 0) { + throw NotImplementedException("SHR.CC"); + } IR::U32 base{v.X(shr.src_reg_a)}; if (shr.brev == 1) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp index 784588e83..4a0f04e47 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -14,6 +14,7 @@ void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_ u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<47, 1, u64> cc; BitField<48, 3, IR::Pred> pred; } const lea{insn}; @@ -21,7 +22,10 @@ void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_ throw NotImplementedException("LEA.HI X"); } if (lea.pred != IR::Pred::PT) { - throw NotImplementedException("LEA.LO Pred"); + throw NotImplementedException("LEA.HI Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.HI CC"); } const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; @@ -44,6 +48,7 @@ void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { BitField<39, 5, u64> scale; BitField<45, 1, u64> neg; BitField<46, 1, u64> x; + BitField<47, 1, u64> cc; BitField<48, 3, IR::Pred> pred; } const lea{insn}; if (lea.x != 0) { @@ -52,6 +57,9 @@ void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { if (lea.pred != IR::Pred::PT) { throw NotImplementedException("LEA.LO Pred"); } + if (lea.cc != 0) { + throw NotImplementedException("LEA.LO CC"); + } const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; const s32 scale{static_cast(lea.scale)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp index 256c47504..e0fe47912 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -73,8 +73,13 @@ IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> cc; } const lop3{insn}; + if (lop3.cc != 0) { + throw NotImplementedException("LOP3 CC"); + } + const IR::U32 op_a{v.X(lop3.src_reg)}; const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; v.X(lop3.dest_reg, result); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp index 6c15963fa..b02789874 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -21,6 +21,7 @@ void TranslatorVisitor::PSET(u64 insn) { BitField<42, 1, u64> neg_pred_c; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop_2; + BitField<47, 1, u64> cc; } const pset{insn}; const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; @@ -31,11 +32,22 @@ void TranslatorVisitor::PSET(u64 insn) { const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; - const IR::U32 false_result{ir.Imm32(0)}; + const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + const IR::U32 result{ir.Select(res_2, true_result, zero)}; X(pset.dest_reg, result); + if (pset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (pset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } } } // namespace Shader::Maxwell From d404b871d595794184b8d80fc05682eb6e2792fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 16:46:26 -0300 Subject: [PATCH 203/770] shader: Mark ImageWrite with side effects --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ceb44e604..2df631791 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -93,6 +93,9 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::WriteSharedU128: + case Opcode::BindlessImageWrite: + case Opcode::BoundImageWrite: + case Opcode::ImageWrite: return true; default: return false; From 2e71e4c5c02d133e3e85597b7eb52b88084a31fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:08:15 -0300 Subject: [PATCH 204/770] spirv: Fix forward declarations on phi nodes --- .../backend/spirv/emit_spirv.cpp | 72 +++++++------------ 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ecd0fac5c..63ed92a5d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -30,18 +30,7 @@ struct FuncTraits { template void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { - const Id forward_id{inst->Definition()}; - const bool has_forward_id{Sirit::ValidId(forward_id)}; - Id current_id{}; - if (has_forward_id) { - current_id = ctx.ExchangeCurrentId(forward_id); - } - const Id new_id{func(ctx, std::forward(args)...)}; - if (has_forward_id) { - ctx.ExchangeCurrentId(current_id); - } else { - inst->SetDefinition(new_id); - } + inst->SetDefinition(func(ctx, std::forward(args)...)); } template @@ -255,31 +244,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SampledBuffer); ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } - -Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { - // Phi nodes can have forward declarations, if an argument is not defined provide a forward - // declaration of it. Invoke will take care of giving it the right definition when it's - // actually defined. - const IR::Value arg{inst->Arg(index)}; - if (arg.IsImmediate()) { - // Let the context handle immediate definitions, as it already knows how - return ctx.Def(arg); - } - IR::Inst* const arg_inst{arg.InstRecursive()}; - if (const Id def{arg_inst->Definition()}; Sirit::ValidId(def)) { - // Return the current definition if it exists - return def; - } - if (arg_inst == inst) { - // This is a self referencing phi node - // Self-referencing definition will be set by the caller, so just grab the current id - return ctx.CurrentId(); - } - // If it hasn't been defined and it's not a self reference, get a forward declaration - const Id def{ctx.ForwardDeclarationId()}; - arg_inst->SetDefinition(def); - return def; -} } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { @@ -292,31 +256,45 @@ std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& bi SetupSignedNanCapabilities(profile, program, ctx, main); } SetupCapabilities(profile, program.info, ctx); + + auto inst{program.blocks.front()->begin()}; + size_t block_index{}; + ctx.PatchDeferredPhi([&](size_t phi_arg) { + if (phi_arg == 0) { + ++inst; + if (inst == program.blocks[block_index]->end() || + inst->GetOpcode() != IR::Opcode::Phi) { + do { + ++block_index; + inst = program.blocks[block_index]->begin(); + } while (inst->GetOpcode() != IR::Opcode::Phi); + } + } + return ctx.Def(inst->Arg(phi_arg)); + }); return ctx.Assemble(); } Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; - boost::container::small_vector operands; - operands.reserve(num_args * 2); + boost::container::small_vector blocks; + blocks.reserve(num_args); for (size_t index = 0; index < num_args; ++index) { - operands.push_back(PhiArgDef(ctx, inst, index)); - operands.push_back(inst->PhiBlock(index)->Definition()); + blocks.push_back(inst->PhiBlock(index)->Definition()); } // The type of a phi instruction is stored in its flags const Id result_type{TypeId(ctx, inst->Flags())}; - return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); + return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size())); } void EmitVoid(EmitContext&) {} Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { - if (const Id id = ctx.Def(value); Sirit::ValidId(id)) { - return id; + const Id id{ctx.Def(value)}; + if (!Sirit::ValidId(id)) { + throw NotImplementedException("Forward identity declaration"); } - const Id def{ctx.ForwardDeclarationId()}; - value.InstRecursive()->SetDefinition(def); - return def; + return id; } void EmitGetZeroFromOp(EmitContext&) { From e5e79648cfa3ac9d30c00bccf4252cd0dc93bccc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Jul 2021 18:16:56 -0300 Subject: [PATCH 205/770] pipeline_helper: Add missing [[maybe_unused]] --- src/video_core/renderer_vulkan/pipeline_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index d2c3f11c1..a39459b2e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,7 +97,7 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.image_descriptors) { + for ([[maybe_unused]] const auto& desc : info.image_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); } } From 1030b612a36f6b44e3b25215039748b01cfb9b8c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:12:56 -0300 Subject: [PATCH 206/770] vk_rasterizer: Request outside render pass execution context for compute --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2ba44330f..7df169c85 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -279,6 +279,7 @@ void RasterizerVulkan::DispatchCompute() { const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } From 9280cd649a9c4cd53b929643377547db598bf5f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:22:20 -0300 Subject: [PATCH 207/770] shader: Move LaneId to the warp emission file and fix AMD --- src/shader_recompiler/backend/spirv/emit_spirv.h | 2 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ---- src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index a39b16f1e..12b7993ae 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -66,7 +66,6 @@ void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); -Id EmitLaneId(EmitContext& ctx); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); Id EmitUndefU8(EmitContext& ctx); @@ -403,6 +402,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id derivates, Id offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitLaneId(EmitContext& ctx); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index a60eca815..5dc150ce2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -274,10 +274,6 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } -Id EmitLaneId(EmitContext& ctx) { - return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); -} - Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index f6196653a..1c23ccc08 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -49,6 +49,14 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { } } // Anonymous namespace +Id EmitLaneId(EmitContext& ctx) { + const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return id; + } + return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Constant(ctx.U32[1], 31U)); +} + Id EmitVoteAll(EmitContext& ctx, Id pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { return ctx.OpSubgroupAllKHR(ctx.U1, pred); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 82c5b37ba..86ea02560 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -58,7 +58,6 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) -OPCODE(LaneId, U32, ) // Undefined OPCODE(UndefU1, U1, ) @@ -419,6 +418,7 @@ OPCODE(ImageRead, U32x4, U32, OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) // Warp operations +OPCODE(LaneId, U32, ) OPCODE(VoteAll, U1, U1, ) OPCODE(VoteAny, U1, U1, ) OPCODE(VoteEqual, U1, U1, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index bc23b0211..8c63c9876 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -343,11 +343,11 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::LaneId: case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: - case IR::Opcode::LaneId: info.uses_subgroup_invocation_id = true; break; case IR::Opcode::GetCbufU8: From ab543f18213133b3076b81f30df386d5cb470e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:37:03 -0300 Subject: [PATCH 208/770] spirv: Guard against typeless image reads on unsupported devices --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 +++- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 7 +++++++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 6 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 63ed92a5d..db7b3f1b2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -238,11 +238,13 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::SampledBuffer); - ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index dd261fd47..17266ce77 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -388,6 +388,10 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { const auto info{inst->Flags()}; + if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { + // LOG_WARNING(..., "Typeless image read not supported by host"); + return ctx.ConstantNull(ctx.U32[4]); + } return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], Image(ctx, index, info), coords, std::nullopt, std::span{}); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 8c63c9876..9ef8688c9 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -421,6 +421,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageRead: { + const auto flags{inst.Flags()}; + info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } case IR::Opcode::SubgroupEqMask: case IR::Opcode::SubgroupLtMask: case IR::Opcode::SubgroupLeMask: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e13cb948a..f0d68d516 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -36,6 +36,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; + bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 253b6eacf..3fbe99268 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -127,6 +127,7 @@ struct Info { bool uses_subgroup_vote{}; bool uses_subgroup_mask{}; bool uses_fswzadd{}; + bool uses_typeless_image_reads{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fcebb8f6e..25dbefd5c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -635,6 +635,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, From 106764a6d51566b050dfe7124f82cf9a4de468c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:46:51 -0300 Subject: [PATCH 209/770] spirv: Move phi node patching to a separate function --- .../backend/spirv/emit_spirv.cpp | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index db7b3f1b2..5a1ffd61c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -246,21 +246,10 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::SampledBuffer); } -} // Anonymous namespace - -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { - EmitContext ctx{profile, program, binding}; - const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(program, ctx, main); - if (profile.support_float_controls) { - ctx.AddExtension("SPV_KHR_float_controls"); - SetupDenormControl(profile, program, ctx, main); - SetupSignedNanCapabilities(profile, program, ctx, main); - } - SetupCapabilities(profile, program.info, ctx); +void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { auto inst{program.blocks.front()->begin()}; - size_t block_index{}; + size_t block_index{0}; ctx.PatchDeferredPhi([&](size_t phi_arg) { if (phi_arg == 0) { ++inst; @@ -274,6 +263,20 @@ std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& bi } return ctx.Def(inst->Arg(phi_arg)); }); +} +} // Anonymous namespace + +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { + EmitContext ctx{profile, program, binding}; + const Id main{DefineMain(ctx, program)}; + DefineEntryPoint(program, ctx, main); + if (profile.support_float_controls) { + ctx.AddExtension("SPV_KHR_float_controls"); + SetupDenormControl(profile, program, ctx, main); + SetupSignedNanCapabilities(profile, program, ctx, main); + } + SetupCapabilities(profile, program.info, ctx); + PatchPhiNodes(program, ctx); return ctx.Assemble(); } From 479ca00071ccaab6ca9ac28daf375e1ed15dc447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:50:30 -0300 Subject: [PATCH 210/770] nsight_aftermath_tracker: Report used shaders to Nsight Aftermath --- .../renderer_vulkan/vk_compute_pass.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- .../nsight_aftermath_tracker.cpp | 5 ++--- .../vulkan_common/nsight_aftermath_tracker.h | 21 ++++++++++--------- .../vulkan_common/vulkan_device.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 3 ++- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2cfe9d4bd..ec9866605 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + device.SaveShader(code); pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25dbefd5c..f699a9bdf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -770,6 +770,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; + device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], @@ -846,7 +847,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program, binding)}; + device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 758c038ba..209cb1e0a 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() { } } -void NsightAftermathTracker::SaveShader(const std::vector& spirv) const { +void NsightAftermathTracker::SaveShader(std::span spirv) const { if (!initialized) { return; } - - std::vector spirv_copy = spirv; + std::vector spirv_copy(spirv.begin(), spirv.end()); GFSDK_Aftermath_SpirvCode shader; shader.pData = spirv_copy.data(); shader.size = static_cast(spirv_copy.size() * 4); diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index 4fe2b14d9..eae1891dd 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -33,7 +34,7 @@ public: NsightAftermathTracker(NsightAftermathTracker&&) = delete; NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; private: #ifdef HAS_NSIGHT_AFTERMATH @@ -61,21 +62,21 @@ private: bool initialized = false; Common::DynamicLibrary dl; - PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; - PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; - PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; - PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; - PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; - PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{}; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{}; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{}; #endif }; #ifndef HAS_NSIGHT_AFTERMATH inline NsightAftermathTracker::NsightAftermathTracker() = default; inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline void NsightAftermathTracker::SaveShader(const std::vector&) const {} +inline void NsightAftermathTracker::SaveShader(std::span) const {} #endif } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c027598ba..78bb741bc 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -493,7 +493,7 @@ void Device::ReportLoss() const { std::this_thread::sleep_for(std::chrono::seconds{15}); } -void Device::SaveShader(const std::vector& spirv) const { +void Device::SaveShader(std::span spirv) const { if (nsight_aftermath_tracker) { nsight_aftermath_tracker->SaveShader(spirv); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ac2311e7e..adf62a707 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -43,7 +44,7 @@ public: void ReportLoss() const; /// Reports a shader to Nsight Aftermath. - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; /// Returns the name of the VkDriverId reported from Vulkan. std::string GetDriverName() const; From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: [PATCH 211/770] shader: Implement ATOM/S and RED --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_context.cpp | 158 +++++- .../backend/spirv/emit_context.h | 20 + .../backend/spirv/emit_spirv.cpp | 3 + .../backend/spirv/emit_spirv.h | 95 ++++ .../backend/spirv/emit_spirv_atomic.cpp | 528 ++++++++++++++++++ .../frontend/ir/ir_emitter.cpp | 200 ++++++- .../frontend/ir/ir_emitter.h | 39 ++ .../frontend/ir/microinstruction.cpp | 66 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 70 +++ .../impl/atomic_operations_global_memory.cpp | 222 ++++++++ .../impl/atomic_operations_shared_memory.cpp | 110 ++++ .../translate/impl/not_implemented.cpp | 12 - .../ir_opt/collect_shader_info_pass.cpp | 70 +++ .../global_memory_to_storage_buffer_pass.cpp | 121 +++- .../ir_opt/lower_fp16_to_fp32.cpp | 12 + src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 13 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + .../vulkan_common/vulkan_device.cpp | 14 + src/video_core/vulkan_common/vulkan_device.h | 6 + 21 files changed, 1745 insertions(+), 19 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8e1d37373..7b9f08aa0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_atomic.cpp backend/spirv/emit_spirv_barriers.cpp backend/spirv/emit_spirv_bitwise_conversion.cpp backend/spirv/emit_spirv_composite.cpp @@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h + frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp + frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp frontend/maxwell/translate/impl/barrier_operations.cpp frontend/maxwell/translate/impl/bitfield_extract.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 32f8c4508..e5d83e9b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,53 @@ namespace Shader::Backend::SPIRV { namespace { +enum class CasFunctionType { + Increment, + Decrement, + FPAdd, + FPMin, + FPMax, +}; + +Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (function_type) { + case CasFunctionType::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case CasFunctionType::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case CasFunctionType::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case CasFunctionType::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case CasFunctionType::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; @@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) { } } +Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type + : storage_memory_u32}; + const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; + const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{OpFunctionParameter(U32[1])}; + const Id op_b{OpFunctionParameter(value_type)}; + const Id base{OpFunctionParameter(storage_type)}; + AddLabel(); + const Id one{Constant(U32[1], 1)}; + OpBranch(loop_header); + AddLabel(loop_header); + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{pointer_type == CasPointerType::Shared + ? OpAccessChain(shared_u32, base, index) + : OpAccessChain(storage_u32, base, u32_zero_value, index)}; + if (value_type.value == F32[2].value) { + const Id u32_value{OpLoad(U32[1], word_pointer)}; + const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; + const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; + const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, + u32_zero_value, u32_new_value, u32_value)}; + const Id success{OpIEqual(U1, atomic_res, u32_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); + } else { + const Id value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitcast( + U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, + u32_zero_value, new_value, value)}; + const Id success{OpIEqual(U1, atomic_res, value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturnValue(OpBitcast(value_type, atomic_res)); + } + OpFunctionEnd(); + return func; +} + void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; - const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); - shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); + shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; @@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (program.info.uses_int16) { shared_store_u16_func = make_function(16, 16); } + if (program.info.uses_shared_increment) { + const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; + increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); + } + if (program.info.uses_shared_decrement) { + const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; + decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); + } } void EmitContext::DefineAttributeMemAccess(const Info& info) { @@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { MemberName(struct_type, 0, "data"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); u32 index{}; for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { - const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; + const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", index)); @@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { index += desc.count; binding += desc.count; } + if (info.uses_global_increment) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; + increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_global_decrement) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; + decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_atomic_f32_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; + f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); + } + if (info.uses_atomic_f16x2_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; + f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f16x2_min) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; + f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f16x2_max) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; + f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f32x2_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; + f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); + } + if (info.uses_atomic_f32x2_min) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; + f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + } + if (info.uses_atomic_f32x2_max) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; + f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + } } void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e70f3458c..34f38454f 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -94,6 +94,7 @@ public: Id output_f32{}; Id storage_u32{}; + Id storage_memory_u32{}; Id image_buffer_type{}; Id sampled_texture_buffer_type{}; @@ -136,9 +137,21 @@ public: Id shared_memory_u32{}; Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u32_type{}; Id shared_store_u8_func{}; Id shared_store_u16_func{}; + Id increment_cas_shared{}; + Id increment_cas_ssbo{}; + Id decrement_cas_shared{}; + Id decrement_cas_ssbo{}; + Id f32_add_cas{}; + Id f16x2_add_cas{}; + Id f16x2_min_cas{}; + Id f16x2_max_cas{}; + Id f32x2_add_cas{}; + Id f32x2_min_cas{}; + Id f32x2_max_cas{}; Id input_position{}; std::array input_generics{}; @@ -153,6 +166,11 @@ public: std::vector interfaces; private: + enum class CasPointerType { + Shared, + Ssbo, + }; + void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); @@ -171,6 +189,8 @@ private: void DefineInputs(const Info& info); void DefineOutputs(const Info& info); + + [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5a1ffd61c..9248bd78b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } + if (info.uses_64_bit_atomics && profile.support_int64_atomics) { + ctx.AddCapability(spv::Capability::Int64Atomics); + } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 12b7993ae..a3398a605 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..03d891419 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -0,0 +1,528 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { + +Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; + return ctx.profile.support_explicit_workgroup_layout + ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) + : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); +} + +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast(offset.U32() / element_size)}; + return ctx.Constant(ctx.U32[1], imm_offset); + } + const u32 shift{static_cast(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + +Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 index_offset = 0) { + // TODO: Support reinterpreting bindings, guaranteed to be aligned + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; + return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); +} + +std::pair GetAtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; + return ctx.OpBitcast(ctx.U64, original_composite); +} + +void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { + const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); +} +} // Anonymous namespace + +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, + ctx.shared_memory_u32); +} + +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, + ctx.shared_memory_u32); +} + +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + StoreResult(ctx, pointer_1, pointer_2, value); + return original_value; +} + +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + StoreResult(ctx, pointer_1, pointer_2, value); + return original_value; +} + +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); +} + +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 17be0c639..a3339f624 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); } +U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicIAdd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMin(pointer_offset, value) + : SharedAtomicUMin(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMax(pointer_offset, value) + : SharedAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicDec32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicAnd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicOr32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicXor32, pointer_offset, value); +} + +U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::SharedAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicIAdd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicIAdd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMin(pointer_offset, value) + : GlobalAtomicUMin(pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMax(pointer_offset, value) + : GlobalAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicDec32, pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicAnd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicAnd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicOr32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicOr64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicXor32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicXor64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst(Opcode::LogicalOr, a, b); } @@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst } void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { + TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; Inst(op, Flags{info}, handle, coords, color); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ec60070ef..f9cbf1304 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -228,6 +228,45 @@ public: [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); + + [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); + + [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 2df631791..0f66c5627 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::WriteSharedU128: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicInc32: + case Opcode::SharedAtomicDec32: + case Opcode::SharedAtomicAnd32: + case Opcode::SharedAtomicOr32: + case Opcode::SharedAtomicXor32: + case Opcode::SharedAtomicExchange32: + case Opcode::SharedAtomicExchange64: + case Opcode::GlobalAtomicIAdd32: + case Opcode::GlobalAtomicSMin32: + case Opcode::GlobalAtomicUMin32: + case Opcode::GlobalAtomicSMax32: + case Opcode::GlobalAtomicUMax32: + case Opcode::GlobalAtomicInc32: + case Opcode::GlobalAtomicDec32: + case Opcode::GlobalAtomicAnd32: + case Opcode::GlobalAtomicOr32: + case Opcode::GlobalAtomicXor32: + case Opcode::GlobalAtomicExchange32: + case Opcode::GlobalAtomicIAdd64: + case Opcode::GlobalAtomicSMin64: + case Opcode::GlobalAtomicUMin64: + case Opcode::GlobalAtomicSMax64: + case Opcode::GlobalAtomicUMax64: + case Opcode::GlobalAtomicAnd64: + case Opcode::GlobalAtomicOr64: + case Opcode::GlobalAtomicXor64: + case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicAddF32: + case Opcode::GlobalAtomicAddF16x2: + case Opcode::GlobalAtomicAddF32x2: + case Opcode::GlobalAtomicMinF16x2: + case Opcode::GlobalAtomicMinF32x2: + case Opcode::GlobalAtomicMaxF16x2: + case Opcode::GlobalAtomicMaxF32x2: + case Opcode::StorageAtomicIAdd32: + case Opcode::StorageAtomicSMin32: + case Opcode::StorageAtomicUMin32: + case Opcode::StorageAtomicSMax32: + case Opcode::StorageAtomicUMax32: + case Opcode::StorageAtomicInc32: + case Opcode::StorageAtomicDec32: + case Opcode::StorageAtomicAnd32: + case Opcode::StorageAtomicOr32: + case Opcode::StorageAtomicXor32: + case Opcode::StorageAtomicExchange32: + case Opcode::StorageAtomicIAdd64: + case Opcode::StorageAtomicSMin64: + case Opcode::StorageAtomicUMin64: + case Opcode::StorageAtomicSMax64: + case Opcode::StorageAtomicUMax64: + case Opcode::StorageAtomicAnd64: + case Opcode::StorageAtomicOr64: + case Opcode::StorageAtomicXor64: + case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicAddF32: + case Opcode::StorageAtomicAddF16x2: + case Opcode::StorageAtomicAddF32x2: + case Opcode::StorageAtomicMinF16x2: + case Opcode::StorageAtomicMinF32x2: + case Opcode::StorageAtomicMaxF16x2: + case Opcode::StorageAtomicMaxF32x2: case Opcode::BindlessImageWrite: case Opcode::BoundImageWrite: case Opcode::ImageWrite: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 86ea02560..dc776a73e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32, OPCODE(SGreaterThanEqual, U1, U32, U32, ) OPCODE(UGreaterThanEqual, U1, U32, U32, ) +// Atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicInc32, U32, U32, U32, ) +OPCODE(SharedAtomicDec32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange64, U64, U32, U64, ) + +OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicInc32, U32, U64, U32, ) +OPCODE(GlobalAtomicDec32, U32, U64, U32, ) +OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) +OPCODE(GlobalAtomicOr32, U32, U64, U32, ) +OPCODE(GlobalAtomicXor32, U32, U64, U32, ) +OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) +OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) +OPCODE(GlobalAtomicOr64, U64, U64, U64, ) +OPCODE(GlobalAtomicXor64, U64, U64, U64, ) +OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) +OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) + +OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) +OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) + // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,222 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::DontCare}, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + + const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; + const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; + const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + IR::Value result; + + if (AtomOpNotApplicable(atom.size, atom.op)) { + result = LoadGlobal(ir, offset, atom.size); + } else if (!is_integer) { + if (atom.size == AtomSize::F32) { + result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; + result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); + } + } else if (size_64) { + result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); + } else { + result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); + } + StoreResult(*this, atom.dest_reg, result, atom.size); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg_b; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + + if (AtomOpNotApplicable(red.size, red.op)) { + return; + } + const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; + const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; + const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + if (!is_integer) { + if (red.size == AtomSize::F32) { + ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; + ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); + } + } else if (size_64) { + ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); + } else { + ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -void TranslatorVisitor::ATOM(u64) { - ThrowNotImplemented(Opcode::ATOM); -} - void TranslatorVisitor::ATOMS_cas(u64) { ThrowNotImplemented(Opcode::ATOMS_cas); } -void TranslatorVisitor::ATOMS(u64) { - ThrowNotImplemented(Opcode::ATOMS); -} - void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -void TranslatorVisitor::RED(u64) { - ThrowNotImplemented(Opcode::RED); -} - void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9ef8688c9..73373576b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPOrdGreaterThanEqual16: case IR::Opcode::FPUnordGreaterThanEqual16: case IR::Opcode::FPIsNan16: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: info.uses_fp16 = true; break; case IR::Opcode::CompositeConstructF64x2: @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF16U64: case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: + case IR::Opcode::SharedAtomicExchange64: info.uses_int64 = true; break; default: @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::SharedAtomicInc32: + info.uses_shared_increment = true; + break; + case IR::Opcode::SharedAtomicDec32: + info.uses_shared_decrement = true; + break; + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::StorageAtomicInc32: + info.uses_global_increment = true; + break; + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::StorageAtomicDec32: + info.uses_global_decrement = true; + break; + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::StorageAtomicAddF32: + info.uses_atomic_f32_add = true; + break; + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + info.uses_atomic_f16x2_add = true; + break; + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::StorageAtomicAddF32x2: + info.uses_atomic_f32x2_add = true; + break; + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + info.uses_atomic_f16x2_min = true; + break; + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::StorageAtomicMinF32x2: + info.uses_atomic_f32x2_min = true; + break; + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: + info.uses_atomic_f16x2_max = true; + break; + case IR::Opcode::GlobalAtomicMaxF32x2: + case IR::Opcode::StorageAtomicMaxF32x2: + info.uses_atomic_f32x2_max = true; + break; + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: + info.uses_64_bit_atomics = true; + break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_64_bit_atomics = true; + info.uses_shared_memory_u32x2 = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index afe871505..0d4f266c3 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: return true; default: return false; @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::WriteStorage64; case IR::Opcode::WriteGlobal128: return IR::Opcode::WriteStorage128; + case IR::Opcode::GlobalAtomicIAdd32: + return IR::Opcode::StorageAtomicIAdd32; + case IR::Opcode::GlobalAtomicSMin32: + return IR::Opcode::StorageAtomicSMin32; + case IR::Opcode::GlobalAtomicUMin32: + return IR::Opcode::StorageAtomicUMin32; + case IR::Opcode::GlobalAtomicSMax32: + return IR::Opcode::StorageAtomicSMax32; + case IR::Opcode::GlobalAtomicUMax32: + return IR::Opcode::StorageAtomicUMax32; + case IR::Opcode::GlobalAtomicInc32: + return IR::Opcode::StorageAtomicInc32; + case IR::Opcode::GlobalAtomicDec32: + return IR::Opcode::StorageAtomicDec32; + case IR::Opcode::GlobalAtomicAnd32: + return IR::Opcode::StorageAtomicAnd32; + case IR::Opcode::GlobalAtomicOr32: + return IR::Opcode::StorageAtomicOr32; + case IR::Opcode::GlobalAtomicXor32: + return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicIAdd64: + return IR::Opcode::StorageAtomicIAdd64; + case IR::Opcode::GlobalAtomicSMin64: + return IR::Opcode::StorageAtomicSMin64; + case IR::Opcode::GlobalAtomicUMin64: + return IR::Opcode::StorageAtomicUMin64; + case IR::Opcode::GlobalAtomicSMax64: + return IR::Opcode::StorageAtomicSMax64; + case IR::Opcode::GlobalAtomicUMax64: + return IR::Opcode::StorageAtomicUMax64; + case IR::Opcode::GlobalAtomicAnd64: + return IR::Opcode::StorageAtomicAnd64; + case IR::Opcode::GlobalAtomicOr64: + return IR::Opcode::StorageAtomicOr64; + case IR::Opcode::GlobalAtomicXor64: + return IR::Opcode::StorageAtomicXor64; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; + case IR::Opcode::GlobalAtomicExchange64: + return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicAddF32: + return IR::Opcode::StorageAtomicAddF32; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF16x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF16x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF16x2; + case IR::Opcode::GlobalAtomicAddF32x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF32x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF32x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: throw InvalidArgument("Invalid global memory opcode {}", opcode); } @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index inst.Invalidate(); } +/// Replace an atomic operation on global memory instruction with its storage buffer equivalent +void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{ + &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; + inst.ReplaceUsesWith(value); +} + /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return ReplaceAtomic(block, inst, storage_index, offset); default: throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { CollectStorageBuffers(*block, inst, info); } } - u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ .cbuf_index = storage_buffer.index, @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); - ++storage_index; } for (const StorageInst& storage_inst : info.to_replace) { const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 52576b07f..62e73d52d 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::ConvertF32U32; case IR::Opcode::ConvertF16U64: return IR::Opcode::ConvertF32U64; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::GlobalAtomicAddF32x2; + case IR::Opcode::StorageAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::GlobalAtomicMinF32x2; + case IR::Opcode::StorageAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::GlobalAtomicMaxF32x2; + case IR::Opcode::StorageAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: return op; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0d68d516..a4e41bda1 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,7 @@ struct Profile { bool support_viewport_index_layer_non_geometry{}; bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; + bool support_int64_atomics{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3fbe99268..7bcecf554 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -128,6 +128,19 @@ struct Info { bool uses_subgroup_mask{}; bool uses_fswzadd{}; bool uses_typeless_image_reads{}; + bool uses_shared_increment{}; + bool uses_shared_decrement{}; + bool uses_global_increment{}; + bool uses_global_decrement{}; + bool uses_atomic_f32_add{}; + bool uses_atomic_f16x2_add{}; + bool uses_atomic_f16x2_min{}; + bool uses_atomic_f16x2_max{}; + bool uses_atomic_f32x2_add{}; + bool uses_atomic_f32x2_min{}; + bool uses_atomic_f32x2_max{}; + bool uses_64_bit_atomics{}; + bool uses_shared_memory_u32x2{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f699a9bdf..b953d694b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, device.IsExtShaderViewportIndexLayerSupported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 78bb741bc..911dfed44 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -681,6 +681,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_shader_atomic_int64{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -710,6 +711,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { @@ -760,6 +762,18 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_shader_atomic_int64) { + VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; + atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + atomic_int64.pNext = nullptr; + features.pNext = &atomic_int64; + physical.GetFeatures2KHR(features); + + if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { + extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + ext_shader_atomic_int64 = true; + } + } if (has_ext_transform_feedback) { VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index adf62a707..4e6d13308 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -229,6 +229,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. + bool IsExtShaderAtomicInt64Supported() const { + return ext_shader_atomic_int64; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -320,6 +325,7 @@ private: bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached From c9337a4ae45639c0d5b6c83c30d098878f3c344a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 04:14:55 -0300 Subject: [PATCH 212/770] shader: Apply sign bit in FCMP (imm) --- .../frontend/maxwell/translate/impl/floating_point_compare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 80109ca0e..7127ebf54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::FCMP_imm(u64 insn) { const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; const u32 value{static_cast(fcmp.value) << 12}; - FCMP(*this, insn, ir.Imm32(value), GetFloatReg39(insn)); + FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); } } // namespace Shader::Maxwell From 5c61e860e4f83524ffce10ca447398e83de81640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:12 -0300 Subject: [PATCH 213/770] shader: Implement SR_THREAD_KILL --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 +++ src/shader_recompiler/backend/spirv/emit_context.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/shader_info.h | 1 + 10 files changed, 22 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index e5d83e9b4..bf2210899 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -790,6 +790,9 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } + if (info.uses_is_helper_invocation) { + is_helper_invocation = DefineInput(*this, U1, spv::BuiltIn::HelperInvocation); + } if (info.uses_subgroup_mask) { subgroup_mask_eq = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupEqMaskKHR); subgroup_mask_lt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLtMaskKHR); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 34f38454f..98a9140bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -107,6 +107,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; Id subgroup_mask_lt{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index a3398a605..04340fa70 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -65,6 +65,7 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5dc150ce2..d552a1b52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -274,6 +274,10 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitIsHelperInvocation(EmitContext& ctx) { + return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index a3339f624..54a273a92 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -347,6 +347,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U1 IREmitter::IsHelperInvocation() { + return Inst(Opcode::IsHelperInvocation); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f9cbf1304..d04224707 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -90,6 +90,8 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U1 IsHelperInvocation(); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LoadGlobalU8(const U64& address); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dc776a73e..f70008682 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -58,6 +58,7 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(IsHelperInvocation, U1, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index be1f21e7b..50650cc56 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_THREAD_KILL: + return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 73373576b..c80d2d29c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -348,6 +348,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::IsHelperInvocation: + info.uses_is_helper_invocation = true; + break; case IR::Opcode::LaneId: case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 7bcecf554..aa204ae37 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -92,6 +92,7 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; std::array input_generics{}; From 2ed80f6b1e85823d7a13dfbb119545a0a0ec7427 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:47 -0300 Subject: [PATCH 214/770] shader: Implement LOP CC --- .../backend/spirv/emit_spirv.h | 6 +++--- .../backend/spirv/emit_spirv_integer.cpp | 21 +++++++++++++------ .../translate/impl/logic_operation.cpp | 14 ++++++++++--- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 04340fa70..150477ff6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -280,9 +280,9 @@ Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 8bf43b91d..944f1e429 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -111,16 +111,25 @@ Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); } -Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { - return ctx.OpBitwiseAnd(ctx.U32[1], a, b); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; } -Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b) { - return ctx.OpBitwiseOr(ctx.U32[1], a, b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; } -Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { - return ctx.OpBitwiseXor(ctx.U32[1], a, b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; } Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp index 89e5cd6de..92cd27ed4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -44,9 +44,6 @@ void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv if (x) { throw NotImplementedException("X"); } - if (cc) { - throw NotImplementedException("CC"); - } IR::U32 op_a{v.X(lop.src_reg)}; if (inv_a != 0) { op_a = v.ir.BitwiseNot(op_a); @@ -60,6 +57,17 @@ void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; v.ir.SetPred(dest_pred, pred_result); } + if (cc) { + if (bit_op == LogicalOp::PASS_B) { + v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); + v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); + } else { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } v.X(lop.dest_reg, result); } From dfd5341d7117e4299b6c34e8b1feb0e66c230478 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:40:00 -0300 Subject: [PATCH 215/770] shader: Mark blocks with no end branch as unreachable --- .../frontend/maxwell/structured_control_flow.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 02cef2645..e63e25aa6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -816,8 +816,13 @@ private: throw NotImplementedException("Statement type {}", stmt.type); } } - if (current_block && continue_block) { - IR::IREmitter{*current_block}.Branch(continue_block); + if (current_block) { + IR::IREmitter ir{*current_block}; + if (continue_block) { + ir.Branch(continue_block); + } else { + ir.Unreachable(); + } } } From 415c7e46ed2f00bb4611cf2913eac1b92ca130bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:54:51 -0300 Subject: [PATCH 216/770] shader: Simplify FLO and throw on CC --- .../translate/impl/find_leading_one.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp index d5361bec5..f0cb25d61 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -8,26 +8,27 @@ namespace Shader::Maxwell { namespace { -void FLO(TranslatorVisitor& v, u64 insn, const IR::U32& src) { +void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<40, 1, u64> tilde; BitField<41, 1, u64> shift; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const flo{insn}; - const bool invert{flo.tilde != 0}; - const bool is_signed{flo.is_signed != 0}; - const bool shift_op{flo.shift != 0}; - - const IR::U32 operand{invert ? v.ir.BitwiseNot(src) : src}; - const IR::U32 find_result{is_signed ? v.ir.FindSMsb(operand) : v.ir.FindUMsb(operand)}; - const IR::U1 find_fail{v.ir.IEqual(find_result, v.ir.Imm32(-1))}; - const IR::U32 offset{v.ir.Imm32(31)}; - const IR::U32 success_result{shift_op ? IR::U32{v.ir.ISub(offset, find_result)} : find_result}; - - const IR::U32 result{v.ir.Select(find_fail, find_result, success_result)}; + if (flo.cc != 0) { + throw NotImplementedException("CC"); + } + if (flo.tilde != 0) { + src = v.ir.BitwiseNot(src); + } + IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; + if (flo.shift != 0) { + const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; + result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; + } v.X(flo.dest_reg, result); } } // Anonymous namespace From a33014022ed86c27ba4faa243fa6d0a69df75564 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:57:37 -0300 Subject: [PATCH 217/770] pipeline_helper: Simplify descriptor objects initialization --- .../renderer_vulkan/pipeline_helper.h | 58 ++++++++----------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index a39459b2e..e2167dc4b 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,42 +85,34 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); - } - for ([[maybe_unused]] const auto& desc : info.image_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); - } + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage) { - bindings.push_back({ - .binding = binding, - .descriptorType = type, - .descriptorCount = 1, - .stageFlags = stage, - .pImmutableSamplers = nullptr, - }); - entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = type, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); + void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + for (size_t i = 0; i < num; ++i) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } } const vk::Device* device{}; From 23b87147321d02abf47868f231f00f29b0d3b87d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:02:44 -0300 Subject: [PATCH 218/770] spirv: Define StorageImageWriteWithoutFormat capability when used --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 +++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 5 +++++ src/shader_recompiler/shader_info.h | 1 + 3 files changed, 9 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9248bd78b..3258b0cf8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -244,6 +244,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } + if (info.uses_typeless_image_writes) { + ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c80d2d29c..ab529e86d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -436,6 +436,11 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageWrite: { + const auto flags{inst.Flags()}; + info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + break; + } case IR::Opcode::SubgroupEqMask: case IR::Opcode::SubgroupLtMask: case IR::Opcode::SubgroupLeMask: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index aa204ae37..6a51aabb5 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -129,6 +129,7 @@ struct Info { bool uses_subgroup_mask{}; bool uses_fswzadd{}; bool uses_typeless_image_reads{}; + bool uses_typeless_image_writes{}; bool uses_shared_increment{}; bool uses_shared_decrement{}; bool uses_global_increment{}; From 2516829e4cfa30378ce049a8c66dee9b3482d673 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:15:44 -0300 Subject: [PATCH 219/770] shader: Fix CC in I2I --- .../maxwell/translate/impl/integer_to_integer_conversion.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index 2f1a58805..53e8d8923 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -105,6 +105,8 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (i2i.cc != 0) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); + v.ResetCFlag(); + v.ResetOFlag(); } } } // Anonymous namespace From f71208414775a6fca87130d2defcdeba75314084 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:33:18 -0300 Subject: [PATCH 220/770] shader: Stub ISBERD --- src/shader_recompiler/CMakeLists.txt | 1 + .../impl/internal_stage_buffer_entry_read.cpp | 55 +++++++++++++++++++ .../translate/impl/not_implemented.cpp | 4 -- 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 7b9f08aa0..8c24c1377 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -120,6 +120,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_shift_right.cpp frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp + frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp frontend/maxwell/translate/impl/load_constant.cpp frontend/maxwell/translate/impl/load_constant.h frontend/maxwell/translate/impl/load_effective_address.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..8c01b7d30 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +#pragma optimize("", off) + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +} // Anonymous namespace + +void TranslatorVisitor::ISBERD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<31, 1, u64> skew; + BitField<32, 1, u64> o; + BitField<33, 2, Mode> mode; + BitField<47, 2, Shift> shift; + } const isberd{insn}; + + if (isberd.skew != 0) { + throw NotImplementedException("SKEW"); + } + if (isberd.o != 0) { + throw NotImplementedException("O"); + } + if (isberd.mode != Mode::Default) { + throw NotImplementedException("Mode {}", isberd.mode.Value()); + } + if (isberd.shift != Shift::Default) { + throw NotImplementedException("Shift {}", isberd.shift.Value()); + } + // LOG_WARNING(..., "ISBERD is stubbed"); + X(isberd.dest_reg, X(isberd.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index aebe3072a..694bdfccb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -141,10 +141,6 @@ void TranslatorVisitor::IMUL32I(u64) { ThrowNotImplemented(Opcode::IMUL32I); } -void TranslatorVisitor::ISBERD(u64) { - ThrowNotImplemented(Opcode::ISBERD); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } From 4b0172f6debf9ba595d5fd2d3e2329328513f5db Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:33:41 -0300 Subject: [PATCH 221/770] shader: Stub SR_INVOCATION_INFO --- .../maxwell/translate/impl/move_special_register.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 50650cc56..bc822d585 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -115,6 +115,9 @@ enum class SpecialRegister : u64 { switch (special_register) { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; + case SpecialRegister::SR_INVOCATION_INFO: + // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: @@ -128,10 +131,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_XY (Stubbed)"); + // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_Z (Stubbed)"); + // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); From dd3432d357ce0bdf8bb295094c89bf659c939259 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Apr 2021 23:11:24 -0400 Subject: [PATCH 222/770] internal_stage_buffer_entry_read: Remove pragma optimize off --- .../maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 8c01b7d30..edd6220a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -6,8 +6,6 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#pragma optimize("", off) - namespace Shader::Maxwell { namespace { enum class Mode : u64 { From a6cef71cc0b03f929f1bc97152b302562f46bc53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 03:48:15 -0300 Subject: [PATCH 223/770] shader: Implement OUT --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 2 + .../backend/spirv/emit_spirv_special.cpp | 8 ++++ .../frontend/ir/ir_emitter.cpp | 8 ++++ .../frontend/ir/ir_emitter.h | 3 ++ .../frontend/ir/microinstruction.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/load_store_attribute.cpp | 7 +-- .../translate/impl/not_implemented.cpp | 12 ----- .../translate/impl/output_geometry.cpp | 45 +++++++++++++++++++ 10 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8c24c1377..bbbfa98a3 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -134,6 +134,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_register_to_predicate.cpp frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/output_geometry.cpp frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 150477ff6..440075212 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -34,6 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); void EmitMemoryBarrierSystemLevel(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, Id stream); +void EmitEndPrimitive(EmitContext& ctx, Id stream); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 5f80c189f..d20f4def3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -36,4 +36,12 @@ void EmitEpilogue(EmitContext& ctx) { } } +void EmitEmitVertex(EmitContext& ctx, Id stream) { + ctx.OpEmitStreamVertex(stream); +} + +void EmitEndPrimitive(EmitContext& ctx, Id stream) { + ctx.OpEndStreamPrimitive(stream); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 54a273a92..7d48fa1ba 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,14 @@ void IREmitter::Epilogue() { Inst(Opcode::Epilogue); } +void IREmitter::EmitVertex(const U32& stream) { + Inst(Opcode::EmitVertex, stream); +} + +void IREmitter::EndPrimitive(const U32& stream) { + Inst(Opcode::EndPrimitive, stream); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index d04224707..033c4332e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -43,6 +43,9 @@ public: void Prologue(); void Epilogue(); + void EmitVertex(const U32& stream); + void EndPrimitive(const U32& stream); + [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 0f66c5627..204c55fa8 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -69,6 +69,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::MemoryBarrierSystemLevel: case Opcode::Prologue: case Opcode::Epilogue: + case Opcode::EmitVertex: + case Opcode::EndPrimitive: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::SetFragColor: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f70008682..0e487f1a7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -25,6 +25,8 @@ OPCODE(MemoryBarrierSystemLevel, Void, // Special operations OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) +OPCODE(EmitVertex, Void, U32, ) +OPCODE(EndPrimitive, Void, U32, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index f629e7167..79293bd6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) { BitField<8, 8, IR::Reg> index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> stream_reg; + BitField<39, 8, IR::Reg> array_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -100,16 +100,13 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> stream_reg; + BitField<39, 8, IR::Reg> array_reg; BitField<47, 2, Size> size; } const ast{insn}; if (ast.patch != 0) { throw NotImplementedException("P"); } - if (ast.stream_reg != IR::Reg::RZ) { - throw NotImplementedException("Stream store"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 694bdfccb..a45d1e4be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -169,18 +169,6 @@ void TranslatorVisitor::NOP(u64) { // NOP is No-Op. } -void TranslatorVisitor::OUT_reg(u64) { - ThrowNotImplemented(Opcode::OUT_reg); -} - -void TranslatorVisitor::OUT_cbuf(u64) { - ThrowNotImplemented(Opcode::OUT_cbuf); -} - -void TranslatorVisitor::OUT_imm(u64) { - ThrowNotImplemented(Opcode::OUT_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> output_reg; // Not needed on host + BitField<39, 1, u64> emit; + BitField<40, 1, u64> cut; + } const out{insn}; + + stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); + + if (out.emit != 0) { + v.ir.EmitVertex(stream_index); + } + if (out.cut != 0) { + v.ir.EndPrimitive(stream_index); + } + // Host doesn't need the output register, but we can write to it to avoid undefined reads + v.X(out.dest_reg, v.ir.Imm32(0)); +} +} // Anonymous namespace + +void TranslatorVisitor::OUT_reg(u64 insn) { + OUT(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::OUT_cbuf(u64 insn) { + OUT(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::OUT_imm(u64 insn) { + OUT(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell From f263760c5a3aff771123b32b15677e1f7a089640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:22 -0300 Subject: [PATCH 224/770] shader: Implement geometry shaders --- .../backend/spirv/emit_context.cpp | 43 +++++++-- .../backend/spirv/emit_spirv.cpp | 38 ++++++++ .../backend/spirv/emit_spirv.h | 12 +-- .../spirv/emit_spirv_context_get_set.cpp | 93 +++++++++++-------- .../backend/spirv/emit_spirv_special.cpp | 42 ++++++--- .../frontend/ir/ir_emitter.cpp | 20 ++-- .../frontend/ir/ir_emitter.h | 6 +- src/shader_recompiler/frontend/ir/opcodes.inc | 8 +- src/shader_recompiler/frontend/ir/program.h | 4 + .../frontend/maxwell/program.cpp | 13 ++- .../translate/impl/load_store_attribute.cpp | 16 ++-- src/shader_recompiler/profile.h | 10 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 56 +++++++++-- .../renderer_vulkan/vk_pipeline_cache.h | 7 +- 14 files changed, 277 insertions(+), 91 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index bf2210899..01b77a7d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -140,7 +140,27 @@ Id DefineVariable(EmitContext& ctx, Id type, std::optional builtin return id; } +u32 NumVertices(InputTopology input_topology) { + switch (input_topology) { + case InputTopology::Points: + return 1; + case InputTopology::Lines: + return 2; + case InputTopology::LinesAdjacency: + return 4; + case InputTopology::Triangles: + return 3; + case InputTopology::TrianglesAdjacency: + return 6; + } + throw InvalidArgument("Invalid input topology {}", input_topology); +} + Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { + if (ctx.stage == Stage::Geometry) { + const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + } return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); } @@ -455,12 +475,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { void EmitContext::DefineAttributeMemAccess(const Info& info) { const auto make_load{[&] { + const bool is_array{stage == Stage::Geometry}; const Id end_block{OpLabel()}; const Id default_label{OpLabel()}; - const Id func_type_load{TypeFunction(F32[1], U32[1])}; + const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) + : TypeFunction(F32[1], U32[1])}; const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; const Id offset{OpFunctionParameter(U32[1])}; + const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; + AddLabel(); const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; @@ -472,7 +496,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); i++) { + for (u32 i = 0; i < info.input_generics.size(); ++i) { if (!info.input_generics[i].used) { continue; } @@ -486,7 +510,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.loads_position) { AddLabel(labels[label_index]); - const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))}; + const Id pointer{is_array + ? OpAccessChain(input_f32, input_position, vertex, masked_index) + : OpAccessChain(input_f32, input_position, masked_index)}; + const Id result{OpLoad(F32[1], pointer)}; OpReturnValue(result); ++label_index; } @@ -502,7 +529,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { continue; } const Id generic_id{input_generics.at(i)}; - const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)}; + const Id pointer{is_array + ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) + : OpAccessChain(type->pointer, generic_id, masked_index)}; const Id value{OpLoad(type->id, pointer)}; const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; OpReturnValue(result); @@ -910,13 +939,13 @@ void EmitContext::DefineOutputs(const Info& info) { } if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { - throw NotImplementedException("Storing PointSize in Fragment stage"); + throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); } if (info.stores_clip_distance) { if (stage == Stage::Fragment) { - throw NotImplementedException("Storing PointSize in Fragment stage"); + throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); @@ -924,7 +953,7 @@ void EmitContext::DefineOutputs(const Info& info) { if (info.stores_viewport_index && (profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) { if (stage == Stage::Fragment) { - throw NotImplementedException("Storing ViewportIndex in Fragment stage"); + throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3258b0cf8..d7c5890ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -134,6 +134,44 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Shader::Stage::VertexB: execution_model = spv::ExecutionModel::Vertex; break; + case Shader::Stage::Geometry: + execution_model = spv::ExecutionModel::Geometry; + ctx.AddCapability(spv::Capability::Geometry); + ctx.AddCapability(spv::Capability::GeometryStreams); + switch (ctx.profile.input_topology) { + case InputTopology::Points: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); + break; + case InputTopology::Lines: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); + break; + case InputTopology::LinesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); + break; + case InputTopology::Triangles: + ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); + break; + case InputTopology::TrianglesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); + break; + } + switch (program.output_topology) { + case OutputTopology::PointList: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); + break; + case OutputTopology::LineStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); + break; + case OutputTopology::TriangleStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); + break; + } + if (program.info.stores_point_size) { + ctx.AddCapability(spv::Capability::GeometryPointSize); + } + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); + ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + break; case Shader::Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 440075212..c0e1b8833 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -34,8 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); void EmitMemoryBarrierSystemLevel(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, Id stream); -void EmitEndPrimitive(EmitContext& ctx, Id stream); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); @@ -51,10 +51,10 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d552a1b52..a91b4c212 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -29,6 +30,15 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } +template +Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { + if (ctx.stage == Stage::Geometry) { + return ctx.OpAccessChain(pointer_type, base, vertex, std::forward(args)...); + } else { + return ctx.OpAccessChain(pointer_type, base, std::forward(args)...); + } +} + std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 element{static_cast(attr) % 4}; const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; @@ -66,6 +76,31 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { throw NotImplementedException("Read attribute {}", attr); } } + +Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, + const IR::Value& binding, const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Constant buffer indexing"); + } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const Id uniform_type{ctx.uniform_types.*member_ptr}; + if (!offset.IsImmediate()) { + Id index{ctx.Def(offset)}; + if (element_size > 1) { + const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; + const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; + index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; + return ctx.OpLoad(result_type, access_chain); + } + if (offset.U32() % element_size != 0) { + throw NotImplementedException("Unaligned immediate constant buffer load"); + } + const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(result_type, access_chain); +} } // Anonymous namespace void EmitGetRegister(EmitContext&) { @@ -100,31 +135,6 @@ void EmitGetIndirectBranchVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, - u32 element_size, const IR::Value& binding, const IR::Value& offset) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Constant buffer indexing"); - } - const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; - const Id uniform_type{ctx.uniform_types.*member_ptr}; - if (!offset.IsImmediate()) { - Id index{ctx.Def(offset)}; - if (element_size > 1) { - const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; - const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; - index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); - } - const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; - return ctx.OpLoad(result_type, access_chain); - } - if (offset.U32() % element_size != 0) { - throw NotImplementedException("Unaligned immediate constant buffer load"); - } - const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; - const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; - return ctx.OpLoad(result_type, access_chain); -} - Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; return ctx.OpUConvert(ctx.U32[1], load); @@ -157,7 +167,7 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast(attr) % 4}; const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { @@ -168,7 +178,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { return ctx.Constant(ctx.F32[1], 0.0f); } const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())}; + const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } @@ -177,8 +187,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id())); + return ctx.OpLoad( + ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id())); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpLoad(ctx.U32[1], ctx.instance_id); @@ -198,29 +208,32 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { ctx.Constant(ctx.U32[1], std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, - ctx.Constant(ctx.U32[1], 0U))); + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, + ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, + ctx.Constant(ctx.U32[1], 1U))); default: throw NotImplementedException("Read attribute {}", attr); } } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { const std::optional output{OutputAttrPointer(ctx, attr)}; - if (!output) { - return; + if (output) { + ctx.OpStore(*output, value); } - ctx.OpStore(*output, value); } -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) { - return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { + if (ctx.stage == Stage::Geometry) { + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); + } else { + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); + } } -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) { +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d20f4def3..6c8fcd5a5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -5,6 +5,17 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +void ConvertDepthMode(EmitContext& ctx) { + const Id type{ctx.F32[1]}; + const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; + const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; + ctx.OpStore(ctx.output_position, vector); +} +} // Anonymous namespace void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB) { @@ -25,23 +36,30 @@ void EmitPrologue(EmitContext& ctx) { } void EmitEpilogue(EmitContext& ctx) { - if (ctx.profile.convert_depth_mode) { - const Id type{ctx.F32[1]}; - const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; - const Id z{ctx.OpCompositeExtract(type, position, 2u)}; - const Id w{ctx.OpCompositeExtract(type, position, 3u)}; - const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; - const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; - ctx.OpStore(ctx.output_position, vector); + if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { + ConvertDepthMode(ctx); } } -void EmitEmitVertex(EmitContext& ctx, Id stream) { - ctx.OpEmitStreamVertex(stream); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + if (ctx.profile.convert_depth_mode) { + ConvertDepthMode(ctx); + } + if (!stream.IsImmediate()) { + // LOG_WARNING(..., "EmitVertex's stream is not constant"); + ctx.OpEmitStreamVertex(ctx.u32_zero_value); + return; + } + ctx.OpEmitStreamVertex(ctx.Def(stream)); } -void EmitEndPrimitive(EmitContext& ctx, Id stream) { - ctx.OpEndStreamPrimitive(stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + if (!stream.IsImmediate()) { + // LOG_WARNING(..., "EndPrimitive's stream is not constant"); + ctx.OpEndStreamPrimitive(ctx.u32_zero_value); + return; + } + ctx.OpEndStreamPrimitive(ctx.Def(stream)); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7d48fa1ba..d66eb17a6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -308,19 +308,27 @@ U1 IREmitter::GetFlowTestResult(FlowTest test) { } F32 IREmitter::GetAttribute(IR::Attribute attribute) { - return Inst(Opcode::GetAttribute, attribute); + return GetAttribute(attribute, Imm32(0)); } -void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { - Inst(Opcode::SetAttribute, attribute, value); +F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { + return Inst(Opcode::GetAttribute, attribute, vertex); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttribute, attribute, value, vertex); } F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { - return Inst(Opcode::GetAttributeIndexed, phys_address); + return GetAttributeIndexed(phys_address, Imm32(0)); } -void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { - Inst(Opcode::SetAttributeIndexed, phys_address, value); +F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { + return Inst(Opcode::GetAttributeIndexed, phys_address, vertex); +} + +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 033c4332e..e70359eb1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -77,10 +77,12 @@ public: [[nodiscard]] U1 GetFlowTestResult(FlowTest test); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); - void SetAttribute(IR::Attribute attribute, const F32& value); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); + void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); - void SetAttributeIndexed(const U32& phys_address, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); + void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0e487f1a7..7a21fe746 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -44,10 +44,10 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU32x2, U32x2, U32, U32, ) -OPCODE(GetAttribute, F32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, F32, ) -OPCODE(GetAttributeIndexed, F32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetAttributeIndexed, F32, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 3a37b3ab9..51e1a8c77 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/program_header.h" #include "shader_recompiler/shader_info.h" #include "shader_recompiler/stage.h" @@ -21,6 +22,9 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + OutputTopology output_topology{}; + u32 output_vertices{}; + u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aaf2a74a7..ab67446c8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ald.vertex_reg)}; const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); } return; } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<47, 2, Size> size; } const ast{insn}; @@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ast.vertex_reg)}; const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); } return; } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a4e41bda1..06f1f59bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,14 @@ enum class AttributeType : u8 { Disabled, }; +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + struct Profile { u32 supported_spirv{0x00010000}; @@ -46,6 +54,8 @@ struct Profile { std::array generic_input_types{}; bool convert_depth_mode{}; + InputTopology input_topology{}; + std::optional fixed_state_point_size; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b953d694b..f49add208 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -769,7 +769,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program.stage)}; + const Shader::Profile profile{MakeProfile(key, program)}; const std::vector code{EmitSPIRV(profile, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA } Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - Shader::Stage stage) { + const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; - if (stage == Shader::Stage::VertexB) { - profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + profile.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + profile.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + profile.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + profile.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + profile.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; } return profile; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 343ea1554..8b6839966 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -33,6 +33,10 @@ namespace Core { class System; } +namespace Shader::IR { +struct Program; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -160,7 +164,8 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; From 2597cee85b74be40bfecf0dc9cda90263d6cce40 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:53 -0300 Subject: [PATCH 225/770] shader: Add constant propagation for *&^| binary operations --- .../ir_opt/constant_propagation_pass.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 61fbbe04c..ee73b5b60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -422,6 +422,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldAdd(block, inst); case IR::Opcode::ISub32: return FoldISub32(inst); + case IR::Opcode::IMul32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); + return; case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: @@ -479,6 +482,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::INotEqual: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); return; + case IR::Opcode::BitwiseAnd32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); + return; + case IR::Opcode::BitwiseOr32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; }); + return; + case IR::Opcode::BitwiseXor32: + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; }); + return; case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { From c070991def83e9bbfd599b03a6d52eb2bd4131c9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 22:26:15 -0300 Subject: [PATCH 226/770] shader: Fix fixed pipeline point size on geometry shaders --- .../backend/spirv/emit_spirv_special.cpp | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 6c8fcd5a5..fee740c08 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -15,6 +15,13 @@ void ConvertDepthMode(EmitContext& ctx) { const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; ctx.OpStore(ctx.output_position, vector); } + +void SetFixedPipelinePointSize(EmitContext& ctx) { + if (ctx.profile.fixed_state_point_size) { + const float point_size{*ctx.profile.fixed_state_point_size}; + ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); + } +} } // Anonymous namespace void EmitPrologue(EmitContext& ctx) { @@ -28,10 +35,9 @@ void EmitPrologue(EmitContext& ctx) { ctx.OpStore(generic_id, default_vector); } } - if (ctx.profile.fixed_state_point_size) { - const float point_size{*ctx.profile.fixed_state_point_size}; - ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); - } + } + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + SetFixedPipelinePointSize(ctx); } } @@ -45,21 +51,23 @@ void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { if (ctx.profile.convert_depth_mode) { ConvertDepthMode(ctx); } - if (!stream.IsImmediate()) { + if (stream.IsImmediate()) { + ctx.OpEmitStreamVertex(ctx.Def(stream)); + } else { // LOG_WARNING(..., "EmitVertex's stream is not constant"); ctx.OpEmitStreamVertex(ctx.u32_zero_value); - return; } - ctx.OpEmitStreamVertex(ctx.Def(stream)); + // Restore fixed pipeline point size after emitting the vertex + SetFixedPipelinePointSize(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { - if (!stream.IsImmediate()) { + if (stream.IsImmediate()) { + ctx.OpEndStreamPrimitive(ctx.Def(stream)); + } else { // LOG_WARNING(..., "EndPrimitive's stream is not constant"); ctx.OpEndStreamPrimitive(ctx.u32_zero_value); - return; } - ctx.OpEndStreamPrimitive(ctx.Def(stream)); } } // namespace Shader::Backend::SPIRV From fa75b9b0626c8e118e27207dd1e82e2f415fc0bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 05:32:21 -0300 Subject: [PATCH 227/770] spirv: Rework storage buffers and shader memory --- .../backend/spirv/emit_context.cpp | 440 ++++++++++-------- .../backend/spirv/emit_context.h | 49 +- .../backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/emit_spirv.h | 20 +- .../backend/spirv/emit_spirv_atomic.cpp | 333 +++++-------- .../backend/spirv/emit_spirv_memory.cpp | 135 +++--- .../ir_opt/collect_shader_info_pass.cpp | 69 ++- src/shader_recompiler/shader_info.h | 4 +- .../vulkan_common/vulkan_device.cpp | 29 +- 9 files changed, 581 insertions(+), 500 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 01b77a7d1..df53e58a8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,7 +15,7 @@ namespace Shader::Backend::SPIRV { namespace { -enum class CasFunctionType { +enum class Operation { Increment, Decrement, FPAdd, @@ -23,44 +23,11 @@ enum class CasFunctionType { FPMax, }; -Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { - const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; - const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; - const Id op_a{ctx.OpFunctionParameter(value_type)}; - const Id op_b{ctx.OpFunctionParameter(value_type)}; - ctx.AddLabel(); - Id result{}; - switch (function_type) { - case CasFunctionType::Increment: { - const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; - const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; - result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); - break; - } - case CasFunctionType::Decrement: { - const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; - const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; - const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; - const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; - result = ctx.OpSelect(value_type, pred, op_b, decr); - break; - } - case CasFunctionType::FPAdd: - result = ctx.OpFAdd(value_type, op_a, op_b); - break; - case CasFunctionType::FPMin: - result = ctx.OpFMin(value_type, op_a, op_b); - break; - case CasFunctionType::FPMax: - result = ctx.OpFMax(value_type, op_a, op_b); - break; - default: - break; - } - ctx.OpReturnValue(result); - ctx.OpFunctionEnd(); - return func; -} +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; @@ -182,12 +149,6 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { throw InvalidArgument("Invalid attribute type {}", type); } -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - std::optional AttrTypes(EmitContext& ctx, u32 index) { const AttributeType type{ctx.profile.generic_input_types.at(index)}; switch (type) { @@ -203,6 +164,164 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } +void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 65536U / element_size))}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberName(struct_type, 0, "data"); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; + ctx.uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + ctx.cbufs[desc.index + i].*member_type = id; + } + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + binding += desc.count; + } +} + +void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, + u32 stride) { + const Id array_type{ctx.TypeRuntimeArray(type)}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + type_def.array = struct_pointer; + type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); + + u32 index{}; + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("ssbo{}", index)); + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + for (size_t i = 0; i < desc.count; ++i) { + ctx.ssbos[index + i].*member_type = id; + } + index += desc.count; + binding += desc.count; + } +} + +Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (operation) { + case Operation::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case Operation::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case Operation::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case Operation::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case Operation::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + +Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, + Id value_type, Id memory_type, spv::Scope scope) { + const bool is_shared{scope == spv::Scope::Workgroup}; + const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; + const Id cas_func{CasFunction(ctx, operation, value_type)}; + const Id zero{ctx.u32_zero_value}; + const Id scope_id{ctx.Constant(ctx.U32[1], static_cast(scope))}; + + const Id loop_header{ctx.OpLabel()}; + const Id continue_block{ctx.OpLabel()}; + const Id merge_block{ctx.OpLabel()}; + const Id func_type{is_shared + ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) + : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; + + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; + ctx.AddLabel(); + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + + ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + ctx.OpBranch(continue_block); + + ctx.AddLabel(continue_block); + const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) + : ctx.OpAccessChain(element_pointer, base, index)}; + if (value_type.value == ctx.F32[2].value) { + const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; + const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; + const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; + const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, u32_new_value, u32_value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); + } else { + const Id value{ctx.OpLoad(memory_type, word_pointer)}; + const bool matching_type{value_type.value == memory_type.value}; + const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; + const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; + const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, new_value, value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); + } + ctx.OpFunctionEnd(); + return func; +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -226,6 +345,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineInterfaces(program.info); DefineLocalMemory(program); DefineSharedMemory(program); + DefineSharedMemoryFunctions(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); @@ -263,56 +383,6 @@ Id EmitContext::Def(const IR::Value& value) { } } -Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { - const Id loop_header{OpLabel()}; - const Id continue_block{OpLabel()}; - const Id merge_block{OpLabel()}; - const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type - : storage_memory_u32}; - const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; - const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; - const Id index{OpFunctionParameter(U32[1])}; - const Id op_b{OpFunctionParameter(value_type)}; - const Id base{OpFunctionParameter(storage_type)}; - AddLabel(); - const Id one{Constant(U32[1], 1)}; - OpBranch(loop_header); - AddLabel(loop_header); - OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); - OpBranch(continue_block); - - AddLabel(continue_block); - const Id word_pointer{pointer_type == CasPointerType::Shared - ? OpAccessChain(shared_u32, base, index) - : OpAccessChain(storage_u32, base, u32_zero_value, index)}; - if (value_type.value == F32[2].value) { - const Id u32_value{OpLoad(U32[1], word_pointer)}; - const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; - const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; - const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, - u32_zero_value, u32_new_value, u32_value)}; - const Id success{OpIEqual(U1, atomic_res, u32_value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); - } else { - const Id value{OpLoad(U32[1], word_pointer)}; - const Id new_value{OpBitcast( - U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, - u32_zero_value, new_value, value)}; - const Id success{OpIEqual(U1, atomic_res, value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturnValue(OpBitcast(value_type, atomic_res)); - } - OpFunctionEnd(); - return func; -} - void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -397,27 +467,31 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { Decorate(variable, spv::Decoration::Aliased); interfaces.push_back(variable); - return std::make_pair(variable, element_pointer); + return std::make_tuple(variable, element_pointer, pointer); }}; if (profile.support_explicit_workgroup_layout) { AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); if (program.info.uses_int8) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); - std::tie(shared_memory_u8, shared_u8) = make(U8, 1); + std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); } if (program.info.uses_int16) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); - std::tie(shared_memory_u16, shared_u16) = make(U16, 2); + std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); } - std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); - std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); - std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + if (program.info.uses_int64) { + std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); + } + std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); @@ -463,13 +537,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (program.info.uses_int16) { shared_store_u16_func = make_function(16, 16); } +} + +void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { if (program.info.uses_shared_increment) { - const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; - increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); + increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); } if (program.info.uses_shared_decrement) { - const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; - decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); + decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); } } @@ -628,21 +705,24 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { return; } if (True(info.used_constant_buffer_types & IR::Type::U8)) { - DefineConstantBuffers(info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); - DefineConstantBuffers(info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); } if (True(info.used_constant_buffer_types & IR::Type::U16)) { - DefineConstantBuffers(info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); - DefineConstantBuffers(info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); } if (True(info.used_constant_buffer_types & IR::Type::U32)) { - DefineConstantBuffers(info, &UniformDefinitions::U32, binding, U32[1], 'u', sizeof(u32)); + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); } if (True(info.used_constant_buffer_types & IR::Type::F32)) { - DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32)); + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); } if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { - DefineConstantBuffers(info, &UniformDefinitions::U32x2, binding, U32[2], 'u', sizeof(u64)); + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); } for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { binding += desc.count; @@ -655,75 +735,83 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } AddExtension("SPV_KHR_storage_buffer_storage_class"); - const Id array_type{TypeRuntimeArray(U32[1])}; - Decorate(array_type, spv::Decoration::ArrayStride, 4U); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, "ssbo_block"); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); - storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); - - u32 index{}; + if (True(info.used_storage_buffer_types & IR::Type::U8)) { + DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, + sizeof(u8)); + DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, + sizeof(u8)); + } + if (True(info.used_storage_buffer_types & IR::Type::U16)) { + DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, + sizeof(u16)); + DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, + sizeof(u16)); + } + if (True(info.used_storage_buffer_types & IR::Type::U32)) { + DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], + sizeof(u32)); + } + if (True(info.used_storage_buffer_types & IR::Type::F32)) { + DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], + sizeof(f32)); + } + if (True(info.used_storage_buffer_types & IR::Type::U64)) { + DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, + sizeof(u64)); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { + DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], + sizeof(u32[2])); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x4)) { + DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], + sizeof(u32[4])); + } for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { - const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("ssbo{}", index)); - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - std::fill_n(ssbos.data() + index, desc.count, id); - index += desc.count; binding += desc.count; } - if (info.uses_global_increment) { + const bool needs_function{ + info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || + info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || + info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; + if (needs_function) { AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; - increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_global_increment) { + increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); } if (info.uses_global_decrement) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; - decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); + decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); } if (info.uses_atomic_f32_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; - f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); + f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); } if (info.uses_atomic_f16x2_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; - f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); + f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f16x2_min) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; - f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f16x2_max) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; - f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; - f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); + f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_min) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; - f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_max) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; - f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } } @@ -903,36 +991,6 @@ void EmitContext::DefineInputs(const Info& info) { } } -void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, - u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; - uniform_types.*member_type = uniform_type; - - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("c{}", desc.index)); - for (size_t i = 0; i < desc.count; ++i) { - cbufs[desc.index + i].*member_type = id; - } - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - binding += desc.count; - } -} - void EmitContext::DefineOutputs(const Info& info) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 98a9140bf..cade1fa0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -50,6 +50,35 @@ struct UniformDefinitions { Id U32x2{}; }; +struct StorageTypeDefinition { + Id array{}; + Id element{}; +}; + +struct StorageTypeDefinitions { + StorageTypeDefinition U8{}; + StorageTypeDefinition S8{}; + StorageTypeDefinition U16{}; + StorageTypeDefinition S16{}; + StorageTypeDefinition U32{}; + StorageTypeDefinition U64{}; + StorageTypeDefinition F32{}; + StorageTypeDefinition U32x2{}; + StorageTypeDefinition U32x4{}; +}; + +struct StorageDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; + Id U32x2{}; + Id U32x4{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); @@ -78,12 +107,14 @@ public: Id f32_zero_value{}; UniformDefinitions uniform_types; + StorageTypeDefinitions storage_types; Id private_u32{}; Id shared_u8{}; Id shared_u16{}; Id shared_u32{}; + Id shared_u64{}; Id shared_u32x2{}; Id shared_u32x4{}; @@ -93,14 +124,11 @@ public: Id output_f32{}; - Id storage_u32{}; - Id storage_memory_u32{}; - Id image_buffer_type{}; Id sampled_texture_buffer_type{}; std::array cbufs{}; - std::array ssbos{}; + std::array ssbos{}; std::vector texture_buffers; std::vector textures; std::vector images; @@ -136,8 +164,10 @@ public: Id shared_memory_u8{}; Id shared_memory_u16{}; Id shared_memory_u32{}; + Id shared_memory_u64{}; Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u32_type{}; Id shared_store_u8_func{}; @@ -167,16 +197,12 @@ public: std::vector interfaces; private: - enum class CasPointerType { - Shared, - Ssbo, - }; - void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); void DefineLocalMemory(const IR::Program& program); void DefineSharedMemory(const IR::Program& program); + void DefineSharedMemoryFunctions(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); @@ -185,13 +211,8 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); - void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, - Id type, char type_char, u32 element_size); - void DefineInputs(const Info& info); void DefineOutputs(const Info& info); - - [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d7c5890ab..61a2018d7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -276,7 +276,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } - if (info.uses_64_bit_atomics && profile.support_int64_atomics) { + if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { ctx.AddCapability(spv::Capability::Int64Atomics); } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index c0e1b8833..55b2edba0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -89,17 +89,21 @@ void EmitWriteGlobalS16(EmitContext& ctx); void EmitWriteGlobal32(EmitContext& ctx); void EmitWriteGlobal64(EmitContext& ctx); void EmitWriteGlobal128(EmitContext& ctx); -void EmitLoadStorageU8(EmitContext& ctx); -void EmitLoadStorageS8(EmitContext& ctx); -void EmitLoadStorageU16(EmitContext& ctx); -void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx); -void EmitWriteStorageS8(EmitContext& ctx); -void EmitWriteStorageU16(EmitContext& ctx); -void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 03d891419..aab32dc52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -6,11 +6,12 @@ namespace Shader::Backend::SPIRV { namespace { - -Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; + Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + if (index_offset > 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Constant(ctx.U32[1], index_offset)); + } return ctx.profile.support_explicit_workgroup_layout ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); @@ -30,340 +31,258 @@ Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - u32 index_offset = 0) { - // TODO: Support reinterpreting bindings, guaranteed to be aligned +Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_ptr, const IR::Value& binding, + const IR::Value& offset, size_t element_size) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; - return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); } -std::pair GetAtomicArgs(EmitContext& ctx) { +std::pair AtomicArgs(EmitContext& ctx) { const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; return {scope, semantics}; } -Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { - const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; - const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; - const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; - return ctx.OpBitcast(ctx.U64, original_composite); +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{SharedPointer(ctx, offset)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } -void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { - const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; - ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); - ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); +Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, + offset, sizeof(u32))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); + } + // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; + ctx.OpStore(pointer, result); + return original_value; } } // Anonymous namespace -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); } -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); } -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); } -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); } -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); } -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; - return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, - ctx.shared_memory_u32); + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); } -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; - return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, - ctx.shared_memory_u32); + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); } -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); } -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); } -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); } -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); } -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { + const Id shift_id{ctx.Constant(ctx.U32[1], 3U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - StoreResult(ctx, pointer_1, pointer_2, value); - return original_value; + // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); } Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); } Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); } Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); } Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); } Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); } Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); } Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); } Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); } Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); } Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); } Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); } Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, + &Sirit::Module::OpIAdd); } Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, + &Sirit::Module::OpSMin); } Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, + &Sirit::Module::OpUMin); } Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, + &Sirit::Module::OpSMax); } Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, + &Sirit::Module::OpUMax); } Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, + &Sirit::Module::OpBitwiseAnd); } Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, + &Sirit::Module::OpBitwiseOr); } Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, + &Sirit::Module::OpBitwiseXor); } Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - StoreResult(ctx, pointer_1, pointer_2, value); - return original_value; + // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + ctx.OpStore(pointer, value); + return original; } Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); } Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -371,7 +290,7 @@ Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); @@ -379,7 +298,7 @@ Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -387,7 +306,7 @@ Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); @@ -395,7 +314,7 @@ Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -403,7 +322,7 @@ Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 088bd3059..a8f2ea5a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,29 +22,29 @@ Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -Id EmitLoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - u32 num_components) { - // TODO: Support reinterpreting bindings, guaranteed to be aligned +Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - std::array components; - for (u32 element = 0; element < num_components; ++element) { - Id index{base_index}; - if (element > 0) { - index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); - } - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - components[element] = ctx.OpLoad(ctx.U32[1], pointer); - } - if (num_components == 1) { - return components[0]; - } else { - const std::span components_span(components.data(), num_components); - return ctx.OpCompositeConstruct(ctx.U32[num_components], components_span); - } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { + const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + return ctx.OpLoad(result_type, pointer); +} + +void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { + const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + ctx.OpStore(pointer, value); } } // Anonymous namespace @@ -104,92 +104,85 @@ void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitLoadStorageU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8)); } -void EmitLoadStorageS8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8)); } -void EmitLoadStorageU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16)); } -void EmitLoadStorageS16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16)); } Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 1); + return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32); } Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 2); + return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); } Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 4); + return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); } -void EmitWriteStorageU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8); } -void EmitWriteStorageS8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8); } -void EmitWriteStorageU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16); } -void EmitWriteStorageS16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, value); + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - // TODO: Support reinterpreting bindings, guaranteed to be aligned - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id low_index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))}; - const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)}; - const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)}; - ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); - ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); } void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - // TODO: Support reinterpreting bindings, guaranteed to be aligned - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - for (u32 element = 0; element < 4; ++element) { - Id index = base_index; - if (element > 0) { - index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); - } - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); - } + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ab529e86d..116d93c1c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -315,6 +315,23 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: case IR::Opcode::SharedAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: info.uses_int64 = true; break; default: @@ -457,46 +474,91 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + info.used_storage_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + info.used_storage_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::LoadStorage32: + case IR::Opcode::WriteStorage32: + case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicSMin32: + case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicSMax32: + case IR::Opcode::StorageAtomicUMax32: + case IR::Opcode::StorageAtomicAnd32: + case IR::Opcode::StorageAtomicOr32: + case IR::Opcode::StorageAtomicXor32: + case IR::Opcode::StorageAtomicExchange32: + info.used_storage_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::LoadStorage64: + case IR::Opcode::WriteStorage64: + info.used_storage_buffer_types |= IR::Type::U32x2; + break; + case IR::Opcode::LoadStorage128: + case IR::Opcode::WriteStorage128: + info.used_storage_buffer_types |= IR::Type::U32x4; + break; case IR::Opcode::SharedAtomicInc32: info.uses_shared_increment = true; break; case IR::Opcode::SharedAtomicDec32: info.uses_shared_decrement = true; break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_int64_bit_atomics = true; + break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_increment = true; break; case IR::Opcode::GlobalAtomicDec32: case IR::Opcode::StorageAtomicDec32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_decrement = true; break; case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::StorageAtomicAddF32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32_add = true; break; case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::StorageAtomicAddF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_add = true; break; case IR::Opcode::GlobalAtomicAddF32x2: case IR::Opcode::StorageAtomicAddF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_add = true; break; case IR::Opcode::GlobalAtomicMinF16x2: case IR::Opcode::StorageAtomicMinF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_min = true; break; case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::StorageAtomicMinF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_min = true; break; case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::StorageAtomicMaxF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_max = true; break; case IR::Opcode::GlobalAtomicMaxF32x2: case IR::Opcode::StorageAtomicMaxF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_max = true; break; case IR::Opcode::GlobalAtomicIAdd64: @@ -516,11 +578,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: - info.uses_64_bit_atomics = true; - break; - case IR::Opcode::SharedAtomicExchange64: - info.uses_64_bit_atomics = true; - info.uses_shared_memory_u32x2 = true; + info.used_storage_buffer_types |= IR::Type::U64; + info.uses_int64_bit_atomics = true; break; default: break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 6a51aabb5..15cf09c3d 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -141,10 +141,10 @@ struct Info { bool uses_atomic_f32x2_add{}; bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; - bool uses_64_bit_atomics{}; - bool uses_shared_memory_u32x2{}; + bool uses_int64_bit_atomics{}; IR::Type used_constant_buffer_types{}; + IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 911dfed44..87cfe6312 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -44,6 +44,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -313,6 +314,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, + .pNext = nullptr, + .variablePointersStorageBuffer = VK_TRUE, + .variablePointers = VK_TRUE, + }; + SetNext(next, variable_pointers); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, .pNext = nullptr, @@ -399,6 +408,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; + if (ext_shader_atomic_int64) { + atomic_int64 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR, + .pNext = nullptr, + .shaderBufferInt64Atomics = VK_TRUE, + .shaderSharedInt64Atomics = VK_TRUE, + }; + SetNext(next, atomic_int64); + } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; if (khr_workgroup_memory_explicit_layout) { workgroup_layout = { @@ -624,9 +644,13 @@ void Device::CheckSuitability(bool requires_swapchain) const { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; demote.pNext = nullptr; + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; + variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR; + variable_pointers.pNext = &demote; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &demote; + robustness2.pNext = &variable_pointers; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -654,6 +678,9 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), + std::make_pair(variable_pointers.variablePointers, "variablePointers"), + std::make_pair(variable_pointers.variablePointersStorageBuffer, + "variablePointersStorageBuffer"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), From 09165ae18989c17661faf188e6825a9eb4e03a27 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 06:11:18 -0300 Subject: [PATCH 228/770] shader: Document and relax cache control on surface instructions --- .../translate/impl/surface_load_store.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 9a2d16a6e..e1b8aa8ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -61,18 +61,19 @@ enum class Clamp : u64 { TRAP, }; +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators enum class LoadCache : u64 { - Default, - CG, - CI, - CV, + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (volatile) }; enum class StoreCache : u64 { - Default, - CG, - CS, - WT, + WB, // Cache write-back all coherent levels + CG, // Cache at global level (L2 and below, not L1) + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory, volatile?) }; ImageFormat Format(Size size) { @@ -188,7 +189,7 @@ void TranslatorVisitor::SULD(u64 insn) { if (suld.clamp != Clamp::IGN) { throw NotImplementedException("Clamp {}", suld.clamp.Value()); } - if (suld.cache != LoadCache::Default) { + if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { throw NotImplementedException("Cache {}", suld.cache.Value()); } const bool is_typed{suld.d != 0}; @@ -248,7 +249,7 @@ void TranslatorVisitor::SUST(u64 insn) { if (sust.clamp != Clamp::IGN) { throw NotImplementedException("Clamp {}", sust.clamp.Value()); } - if (sust.cache != StoreCache::Default) { + if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { throw NotImplementedException("Cache {}", sust.cache.Value()); } const bool is_typed{sust.d != 0}; From a83579b50a167ab9483e5058fd1c748018ef6d7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 16:56:22 -0300 Subject: [PATCH 229/770] shader: Implement early Z tests --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 3 files changed, 5 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 61a2018d7..7ad00c434 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -178,6 +178,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (program.info.stores_frag_depth) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } + if (ctx.profile.force_early_z) { + ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); + } break; default: throw NotImplementedException("Stage {}", program.stage); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 06f1f59bd..919bec4e2 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -53,6 +53,7 @@ struct Profile { std::array generic_input_types{}; bool convert_depth_mode{}; + bool force_early_z{}; InputTopology input_topology{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f49add208..8a59a2611 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -934,6 +934,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, profile.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + profile.force_early_z = key.state.early_z != 0; return profile; } From b126987c59964d81ae3705ad7ad6c0ace8714e19 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 01:04:59 -0300 Subject: [PATCH 230/770] shader: Implement transform feedbacks and define file format --- .../backend/spirv/emit_context.cpp | 54 ++++++-- .../backend/spirv/emit_context.h | 8 +- .../backend/spirv/emit_spirv.cpp | 3 + .../spirv/emit_spirv_context_get_set.cpp | 19 ++- .../backend/spirv/emit_spirv_special.cpp | 29 ++++- .../frontend/ir/attribute.cpp | 7 ++ src/shader_recompiler/frontend/ir/attribute.h | 2 + src/shader_recompiler/profile.h | 10 ++ .../renderer_vulkan/fixed_pipeline_state.cpp | 19 ++- .../renderer_vulkan/fixed_pipeline_state.h | 26 +++- .../renderer_vulkan/vk_pipeline_cache.cpp | 118 +++++++++++++++++- 11 files changed, 272 insertions(+), 23 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index df53e58a8..74c42233d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -135,6 +135,45 @@ Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } +void DefineGenericOutput(EmitContext& ctx, size_t index) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_attr_index{static_cast(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!ctx.profile.xfb_varyings.empty()) { + xfb_varying = &ctx.profile.xfb_varyings[base_attr_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + + const Id id{DefineOutput(ctx, ctx.F32[num_components])}; + ctx.Decorate(id, spv::Decoration::Location, static_cast(index)); + if (element > 0) { + ctx.Decorate(id, spv::Decoration::Component, element); + } + if (xfb_varying) { + ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); + ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); + ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); + } + if (num_components < 4 || element > 0) { + ctx.Name(id, fmt::format("out_attr{}", index)); + } else { + const std::string_view subswizzle{swizzle.substr(element, num_components)}; + ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } + const GenericElementInfo info{ + .id = id, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(ctx.output_generics[index].begin(), num_components, info); + element += num_components; + } +} + Id GetAttributeType(EmitContext& ctx, AttributeType type) { switch (type) { case AttributeType::Float: @@ -663,12 +702,15 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); i++) { + for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (!info.stores_generics[i]) { continue; } + if (output_generics[i][0].num_components != 4) { + throw NotImplementedException("Physical stores and transform feedbacks"); + } AddLabel(labels[label_index]); - const Id generic_id{output_generics.at(i)}; + const Id generic_id{output_generics[i][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); @@ -1015,11 +1057,9 @@ void EmitContext::DefineOutputs(const Info& info) { } viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (info.stores_generics[i]) { - output_generics[i] = DefineOutput(*this, F32[4]); - Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); - Name(output_generics[i], fmt::format("out_attr{}", i)); + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { + DefineGenericOutput(*this, index); } } if (stage == Stage::Fragment) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index cade1fa0d..b27e5540c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -79,6 +79,12 @@ struct StorageDefinitions { Id U32x4{}; }; +struct GenericElementInfo { + Id id{}; + u32 first_element{}; + u32 num_components{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); @@ -189,7 +195,7 @@ public: Id output_point_size{}; Id output_position{}; - std::array output_generics{}; + std::array, 32> output_generics{}; std::array frag_color{}; Id frag_depth{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7ad00c434..444ba276f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -288,6 +288,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_writes) { ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } + if (!ctx.profile.xfb_varyings.empty()) { + ctx.AddCapability(spv::Capability::TransformFeedback); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index a91b4c212..f9c151a5c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -40,11 +40,17 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { - const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + if (info.num_components == 1) { + return info.id; + } else { + const u32 index_element{element - info.first_element}; + const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; + return ctx.OpAccessChain(ctx.output_f32, info.id, index_id); + } } switch (attr) { case IR::Attribute::PointSize: @@ -52,8 +58,11 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + case IR::Attribute::PositionW: { + const u32 element{static_cast(attr) % 4}; + const Id element_id{ctx.Constant(ctx.U32[1], element)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id); + } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index fee740c08..7af29e4dd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -22,6 +22,21 @@ void SetFixedPipelinePointSize(EmitContext& ctx) { ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); } } + +Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one, + Id default_vector) { + switch (num_components) { + case 1: + return element == 3 ? one : zero; + case 2: + return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero); + case 3: + return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero); + case 4: + return default_vector; + } + throw InvalidArgument("Bad element"); +} } // Anonymous namespace void EmitPrologue(EmitContext& ctx) { @@ -30,9 +45,17 @@ void EmitPrologue(EmitContext& ctx) { const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; ctx.OpStore(ctx.output_position, default_vector); - for (const Id generic_id : ctx.output_generics) { - if (Sirit::ValidId(generic_id)) { - ctx.OpStore(generic_id, default_vector); + for (const auto& info : ctx.output_generics) { + if (info[0].num_components == 0) { + continue; + } + u32 element{0}; + while (element < 4) { + const auto& element_info{info[element]}; + const u32 num{element_info.num_components}; + const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)}; + ctx.OpStore(element_info.id, value); + element += num; } } } diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7993e5c43..4d0b8b8e5 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -20,6 +20,13 @@ u32 GenericAttributeIndex(Attribute attribute) { return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } +u32 GenericAttributeElement(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return static_cast(attribute) % 4; +} + std::string NameOf(Attribute attribute) { switch (attribute) { case Attribute::PrimitiveId: diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 34ec7e0cd..8bf2ddf30 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -226,6 +226,8 @@ enum class Attribute : u64 { [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeElement(Attribute attribute); + [[nodiscard]] std::string NameOf(Attribute attribute); } // namespace Shader::IR diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 919bec4e2..5ecae71b9 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "common/common_types.h" @@ -26,6 +27,13 @@ enum class InputTopology { TrianglesAdjacency, }; +struct TransformFeedbackVarying { + u32 buffer{}; + u32 stride{}; + u32 offset{}; + u32 components{}; +}; + struct Profile { u32 supported_spirv{0x00010000}; @@ -58,6 +66,8 @@ struct Profile { InputTopology input_topology{}; std::optional fixed_state_point_size; + + std::vector xfb_varyings; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d8f683907..6a3baf837 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -52,6 +52,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; + no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -113,10 +115,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (!has_extended_dynamic_state) { - no_extended_dynamic_state.Assign(1); + if (no_extended_dynamic_state != 0) { dynamic_state.Refresh(regs); } + if (xfb_enabled != 0) { + xfb_state.Refresh(regs); + } } void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { @@ -158,6 +162,17 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } +void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { + return Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + varyings = regs.tfb_varying_locs; +} + void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 348f1d6ce..5568c4f72 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -130,6 +130,18 @@ struct FixedPipelineState { } }; + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + + void Refresh(const Maxwell& regs); + }; + struct DynamicState { union { u32 raw1; @@ -168,6 +180,7 @@ struct FixedPipelineState { union { u32 raw1; BitField<0, 1, u32> no_extended_dynamic_state; + BitField<1, 1, u32> xfb_enabled; BitField<2, 1, u32> primitive_restart_enable; BitField<3, 1, u32> depth_bias_enable; BitField<4, 1, u32> depth_clamp_disabled; @@ -199,6 +212,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; + TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); @@ -211,8 +225,16 @@ struct FixedPipelineState { } size_t Size() const noexcept { - const size_t total_size = sizeof *this; - return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + if (xfb_enabled != 0) { + // When transform feedback is enabled, use the whole struct + return sizeof(*this); + } else if (no_extended_dynamic_state != 0) { + // Dynamic state is enabled, we can enable more + return offsetof(FixedPipelineState, xfb_state); + } else { + // No XFB, extended dynamic state enabled + return offsetof(FixedPipelineState, dynamic_state); + } } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8a59a2611..de52d0f30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -248,6 +248,10 @@ namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +// TODO: Move this to a separate file +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION{1}; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; @@ -379,13 +383,14 @@ void SerializePipeline(const Key& key, const Envs& envs, const std::string& file try { std::ofstream file; file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); if (!file.is_open()) { LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); return; } if (file.tellp() == 0) { - // Write header... + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); } const std::span key_span(reinterpret_cast(&key), sizeof(key)); SerializePipeline(key_span, MakeSpan(envs), file); @@ -520,8 +525,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading file.exceptions(std::ifstream::failbit); const auto end{file.tellg()}; file.seekg(0, std::ios::beg); - // Read header... + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::Delete(pipeline_cache_filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Render_Vulkan, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + pipeline_cache_filename); + } + return; + } while (file.tellg() != end) { if (stop_loading) { return; @@ -879,6 +903,88 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +static std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; @@ -893,6 +999,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), @@ -902,6 +1011,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; break; default: From 6c512f4bffde6bd8e4dbc74ed27cc84cd7fffadb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 14 Apr 2021 00:32:18 -0400 Subject: [PATCH 231/770] spirv: Implement alpha test --- .../backend/spirv/emit_spirv_special.cpp | 45 +++++++++++++++++++ src/shader_recompiler/profile.h | 15 ++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 36 +++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 7af29e4dd..8bb94f546 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -37,6 +37,48 @@ Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id } throw InvalidArgument("Bad element"); } + +Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) { + switch (comparison) { + case CompareFunction::Never: + return ctx.false_value; + case CompareFunction::Less: + return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2); + case CompareFunction::Equal: + return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::LessThanEqual: + return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Greater: + return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2); + case CompareFunction::NotEqual: + return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::GreaterThanEqual: + return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Always: + return ctx.true_value; + } + throw InvalidArgument("Comparison function {}", comparison); +} + +void AlphaTest(EmitContext& ctx) { + const auto comparison{*ctx.profile.alpha_test_func}; + if (comparison == CompareFunction::Always) { + return; + } + const Id type{ctx.F32[1]}; + const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; + const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; + + const Id true_label{ctx.OpLabel()}; + const Id discard_label{ctx.OpLabel()}; + const Id alpha_reference{ctx.Constant(ctx.F32[1], ctx.profile.alpha_test_reference)}; + const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; + + ctx.OpBranchConditional(condition, true_label, discard_label); + ctx.AddLabel(discard_label); + ctx.OpKill(); + ctx.AddLabel(true_label); +} } // Anonymous namespace void EmitPrologue(EmitContext& ctx) { @@ -68,6 +110,9 @@ void EmitEpilogue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { ConvertDepthMode(ctx); } + if (ctx.stage == Stage::Fragment) { + AlphaTest(ctx); + } } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 5ecae71b9..c26017d75 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -5,8 +5,8 @@ #pragma once #include -#include #include +#include #include "common/common_types.h" @@ -27,6 +27,17 @@ enum class InputTopology { TrianglesAdjacency, }; +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + struct TransformFeedbackVarying { u32 buffer{}; u32 stride{}; @@ -66,6 +77,8 @@ struct Profile { InputTopology input_topology{}; std::optional fixed_state_point_size; + std::optional alpha_test_func; + float alpha_test_reference{}; std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index de52d0f30..80f196d0e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -492,6 +492,37 @@ private: u32 read_lowest{}; u32 read_highest{}; }; + +Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return Shader::CompareFunction::Never; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return Shader::CompareFunction::Less; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return Shader::CompareFunction::Equal; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return Shader::CompareFunction::LessThanEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return Shader::CompareFunction::Greater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return Shader::CompareFunction::NotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return Shader::CompareFunction::GreaterThanEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return Shader::CompareFunction::Always; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); + return {}; +} } // Anonymous namespace void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -1016,6 +1047,11 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, } profile.convert_depth_mode = gl_ndc; break; + case Shader::Stage::Fragment: + profile.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; default: break; } From 6f4a1c8dcfb1d54555b139f1d76ac9157ff49d42 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 14 Apr 2021 01:17:47 -0400 Subject: [PATCH 232/770] spirv: Fix non-atomic 64-bit store --- src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index aab32dc52..c2c879a6c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -77,7 +77,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& binding, offset, sizeof(u32[2]))}; const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; - ctx.OpStore(pointer, result); + ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); return original_value; } } // Anonymous namespace From ab3831f6cb87d7f0a337cef6ecb4b1f63bfb0bb5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 08:00:41 +0200 Subject: [PATCH 233/770] spirv: Fix alpha test --- src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 8bb94f546..ae8b39f41 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -65,6 +65,10 @@ void AlphaTest(EmitContext& ctx) { if (comparison == CompareFunction::Always) { return; } + if (!Sirit::ValidId(ctx.frag_color[0])) { + return; + } + const Id type{ctx.F32[1]}; const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; @@ -74,6 +78,7 @@ void AlphaTest(EmitContext& ctx) { const Id alpha_reference{ctx.Constant(ctx.F32[1], ctx.profile.alpha_test_reference)}; const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; + ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); ctx.OpBranchConditional(condition, true_label, discard_label); ctx.AddLabel(discard_label); ctx.OpKill(); From d8ec99dadaa033aa440671572ed38e2614815e11 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 18:09:18 -0300 Subject: [PATCH 234/770] spirv: Implement Layer stores --- .../backend/spirv/emit_context.cpp | 9 ++++++++- .../backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv.cpp | 16 ++++++++++------ .../backend/spirv/emit_spirv_context_get_set.cpp | 9 +++++++-- .../ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/shader_info.h | 1 + 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 74c42233d..f96d5ae37 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1050,8 +1050,15 @@ void EmitContext::DefineOutputs(const Info& info) { const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); } + if (info.stores_layer && + (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing Layer in fragment stage"); + } + layer = DefineOutput(*this, U32[1], spv::BuiltIn::Layer); + } if (info.stores_viewport_index && - (profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) { + (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index b27e5540c..1f0d8be77 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -157,6 +157,7 @@ public: Id front_face{}; Id point_coord{}; Id clip_distances{}; + Id layer{}; Id viewport_index{}; Id fswzadd_lut_a{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 444ba276f..3bf4c6a9e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -124,17 +124,17 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; switch (program.stage) { - case Shader::Stage::Compute: { + case Stage::Compute: { const std::array workgroup_size{program.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); break; } - case Shader::Stage::VertexB: + case Stage::VertexB: execution_model = spv::ExecutionModel::Vertex; break; - case Shader::Stage::Geometry: + case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::GeometryStreams); @@ -172,7 +172,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); break; - case Shader::Stage::Fragment: + case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); if (program.info.stores_frag_depth) { @@ -258,10 +258,14 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } + if (info.stores_layer) { + ctx.AddCapability(spv::Capability::ShaderLayer); + } if (info.stores_viewport_index) { ctx.AddCapability(spv::Capability::MultiViewport); - if (profile.support_viewport_index_layer_non_geometry && - ctx.stage != Shader::Stage::Geometry) { + } + if (info.stores_layer || info.stores_viewport_index) { + if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f9c151a5c..59c56c5ba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -76,9 +76,14 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id clip_num{ctx.Constant(ctx.U32[1], index)}; return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); } + case IR::Attribute::Layer: + return ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry + ? std::optional{ctx.layer} + : std::nullopt; case IR::Attribute::ViewportIndex: - return (ctx.profile.support_viewport_index_layer_non_geometry || - ctx.stage == Shader::Stage::Geometry) + return ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry ? std::optional{ctx.viewport_index} : std::nullopt; default: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 116d93c1c..617ec05ce 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -83,6 +83,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; + case IR::Attribute::Layer: + info.stores_layer = true; + break; case IR::Attribute::ViewportIndex: info.stores_viewport_index = true; break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 15cf09c3d..e078b0fa1 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -109,6 +109,7 @@ struct Info { bool stores_position{}; bool stores_point_size{}; bool stores_clip_distance{}; + bool stores_layer{}; bool stores_viewport_index{}; bool stores_indexed_attributes{}; From 416e1b7441d34512fcb0ffed014daf7ca4bb62bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:36:36 -0300 Subject: [PATCH 235/770] spirv: Implement image buffers --- .../backend/spirv/emit_context.cpp | 70 +++++++++++++------ .../backend/spirv/emit_context.h | 7 ++ .../backend/spirv/emit_spirv_image.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 19 ++++- src/shader_recompiler/shader_info.h | 10 +++ src/video_core/buffer_cache/buffer_cache.h | 24 +++++-- .../renderer_vulkan/pipeline_helper.h | 2 + .../renderer_vulkan/vk_compute_pipeline.cpp | 25 ++++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 ++++---- 9 files changed, 142 insertions(+), 49 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index f96d5ae37..032cf5e03 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -54,28 +54,30 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { throw InvalidArgument("Invalid texture type {}", desc.type); } +spv::ImageFormat GetImageFormat(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return spv::ImageFormat::Unknown; + case ImageFormat::R8_UINT: + return spv::ImageFormat::R8ui; + case ImageFormat::R8_SINT: + return spv::ImageFormat::R8i; + case ImageFormat::R16_UINT: + return spv::ImageFormat::R16ui; + case ImageFormat::R16_SINT: + return spv::ImageFormat::R16i; + case ImageFormat::R32_UINT: + return spv::ImageFormat::R32ui; + case ImageFormat::R32G32_UINT: + return spv::ImageFormat::Rg32ui; + case ImageFormat::R32G32B32A32_UINT: + return spv::ImageFormat::Rgba32ui; + } + throw InvalidArgument("Invalid image format {}", format); +} + Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { - const spv::ImageFormat format{[&] { - switch (desc.format) { - case ImageFormat::Typeless: - return spv::ImageFormat::Unknown; - case ImageFormat::R8_UINT: - return spv::ImageFormat::R8ui; - case ImageFormat::R8_SINT: - return spv::ImageFormat::R8i; - case ImageFormat::R16_UINT: - return spv::ImageFormat::R16ui; - case ImageFormat::R16_SINT: - return spv::ImageFormat::R16i; - case ImageFormat::R32_UINT: - return spv::ImageFormat::R32ui; - case ImageFormat::R32G32_UINT: - return spv::ImageFormat::Rg32ui; - case ImageFormat::R32G32B32A32_UINT: - return spv::ImageFormat::Rgba32ui; - } - throw InvalidArgument("Invalid image format {}", desc.format); - }()}; + const spv::ImageFormat format{GetImageFormat(desc.format)}; const Id type{ctx.U32[1]}; switch (desc.type) { case TextureType::Color1D: @@ -388,6 +390,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); + DefineImageBuffers(program.info, binding); DefineTextures(program.info, binding); DefineImages(program.info, binding); DefineAttributeMemAccess(program.info); @@ -883,6 +886,31 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of image buffers"); + } + const spv::ImageFormat format{GetImageFormat(desc.format)}; + const Id image_type{TypeImage(U32[4], spv::Dim::Buffer, false, false, false, 2, format)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("imgbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + const ImageBufferDefinition def{ + .id = id, + .image_type = image_type, + }; + image_buffers.insert(image_buffers.end(), desc.count, def); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 1f0d8be77..0da14d5f8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -35,6 +35,11 @@ struct TextureDefinition { Id image_type; }; +struct ImageBufferDefinition { + Id id; + Id image_type; +}; + struct ImageDefinition { Id id; Id image_type; @@ -136,6 +141,7 @@ public: std::array cbufs{}; std::array ssbos{}; std::vector texture_buffers; + std::vector image_buffers; std::vector textures; std::vector images; @@ -213,6 +219,7 @@ private: void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); + void DefineImageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 17266ce77..c8d1d25b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -149,7 +149,8 @@ Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { throw NotImplementedException("Indirect image indexing"); } if (info.type == TextureType::Buffer) { - throw NotImplementedException("Image buffer"); + const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; + return ctx.OpLoad(def.image_type, def.id); } else { const ImageDefinition def{ctx.images.at(index.U32())}; return ctx.OpLoad(def.image_type, def.id); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index a7b1fcfad..e1d5a2ce1 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -158,9 +158,11 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + ImageBufferDescriptors& image_buffer_descriptors_, TextureDescriptors& texture_descriptors_, ImageDescriptors& image_descriptors_) : texture_buffer_descriptors{texture_buffer_descriptors_}, + image_buffer_descriptors{image_buffer_descriptors_}, texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} u32 Add(const TextureBufferDescriptor& desc) { @@ -170,6 +172,13 @@ public: }); } + u32 Add(const ImageBufferDescriptor& desc) { + return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } + u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.cbuf_index == existing.cbuf_index && @@ -200,6 +209,7 @@ private: } TextureBufferDescriptors& texture_buffer_descriptors; + ImageBufferDescriptors& image_buffer_descriptors; TextureDescriptors& texture_descriptors; ImageDescriptors& image_descriptors; }; @@ -224,6 +234,7 @@ void TexturePass(Environment& env, IR::Program& program) { }); Descriptors descriptors{ program.info.texture_buffer_descriptors, + program.info.image_buffer_descriptors, program.info.texture_descriptors, program.info.image_descriptors, }; @@ -261,7 +272,13 @@ void TexturePass(Environment& env, IR::Program& program) { case IR::Opcode::ImageWrite: { const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { - throw NotImplementedException("Image buffer"); + index = descriptors.Add(ImageBufferDescriptor{ + .format = flags.image_format, + .is_written = is_written, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); } else { index = descriptors.Add(ImageDescriptor{ .type = flags.type, diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e078b0fa1..336c6131a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -67,6 +67,15 @@ struct TextureBufferDescriptor { }; using TextureBufferDescriptors = boost::container::small_vector; +struct ImageBufferDescriptor { + ImageFormat format; + bool is_written; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using ImageBufferDescriptors = boost::container::small_vector; + struct TextureDescriptor { TextureType type; bool is_depth; @@ -153,6 +162,7 @@ struct Info { constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; + ImageBufferDescriptors image_buffer_descriptors; TextureDescriptors texture_descriptors; ImageDescriptors image_descriptors; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6701aab82..29746f61d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + PixelFormat format, bool is_written); void UnbindComputeStorageBuffers(); @@ -163,8 +163,8 @@ public: void UnbindComputeTextureBuffers(); - void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, + bool is_written); void FlushCachedWrites(); @@ -393,7 +393,9 @@ private: u32 written_compute_storage_buffers = 0; std::array enabled_texture_buffers{}; + std::array written_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -700,12 +702,14 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; + written_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format) { + u32 size, PixelFormat format, bool is_written) { enabled_texture_buffers[stage] |= 1U << tbo_index; + written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -732,12 +736,14 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; + written_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format) { + PixelFormat format, bool is_written) { enabled_compute_texture_buffers |= 1U << tbo_index; + written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1274,6 +1280,10 @@ void BufferCache

::UpdateTextureBuffers(size_t stage) { ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { Binding& binding = texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark buffer as written if needed + if (((written_texture_buffers[stage] >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } @@ -1343,6 +1353,10 @@ void BufferCache

::UpdateComputeTextureBuffers() { ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { Binding& binding = compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark as written if needed + if (((written_compute_texture_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index e2167dc4b..aaf9a735e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -88,6 +88,7 @@ public: Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } @@ -126,6 +127,7 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { image_view_ids += info.texture_buffer_descriptors.size(); + image_view_ids += info.image_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3d690f335..3c907ec5a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -97,10 +97,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -108,24 +110,29 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); buffer_cache.UnbindComputeTextureBuffers(); ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format); + image_view.format, is_written); ++texture_buffer_ids; ++index; - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 23c01f24e..84720a6f9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,10 +175,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -186,28 +188,33 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format); + image_view.BufferSize(), image_view.format, + is_written); ++index; ++texture_buffer_index; - } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); texture_buffer_index += info.texture_descriptors.size(); + texture_buffer_index += info.image_descriptors.size(); } buffer_cache.UpdateGraphicsBuffers(is_indexed); From 7ae3ea6beefae76e6671436114863fce1baacd9e Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 15 Apr 2021 19:01:45 -0400 Subject: [PATCH 236/770] vk_pipeline_cache: Silence GCC warnings Silences `-Werror=missing-field-initializers` due to missing initializers. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 80f196d0e..ee22255bf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -696,6 +696,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, + .alpha_test_func{}, + .xfb_varyings{}, }; } From 34519d3fc637fa921c0105e01dbbc50cfacd924d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:40:30 -0300 Subject: [PATCH 237/770] shader: Mark atomic instructions as writes --- .../global_memory_to_storage_buffer_pass.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0d4f266c3..378a3a915 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -115,6 +115,33 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: return true; default: return false; From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: [PATCH 238/770] shader: Implement tessellation shaders, polygon mode and invocation id --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 147 ++++++++++++----- .../backend/spirv/emit_context.h | 10 +- .../backend/spirv/emit_spirv.cpp | 39 +++++ .../backend/spirv/emit_spirv.h | 3 + .../spirv/emit_spirv_context_get_set.cpp | 88 +++++++++-- .../frontend/ir/ir_emitter.cpp | 12 ++ .../frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + src/shader_recompiler/frontend/ir/patch.cpp | 28 ++++ src/shader_recompiler/frontend/ir/patch.h | 149 ++++++++++++++++++ src/shader_recompiler/frontend/ir/type.h | 41 ++--- src/shader_recompiler/frontend/ir/value.cpp | 9 ++ src/shader_recompiler/frontend/ir/value.h | 4 + .../frontend/maxwell/program.cpp | 5 + .../translate/impl/load_store_attribute.cpp | 33 ++-- .../translate/impl/move_special_register.cpp | 2 + .../ir_opt/collect_shader_info_pass.cpp | 41 +++++ src/shader_recompiler/profile.h | 16 ++ src/shader_recompiler/shader_info.h | 5 + .../renderer_vulkan/maxwell_to_vk.cpp | 13 ++ .../renderer_vulkan/maxwell_to_vk.h | 2 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 30 ++++ .../vk_staging_buffer_pool.cpp | 2 +- .../vulkan_common/vulkan_device.cpp | 3 +- 28 files changed, 605 insertions(+), 91 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/patch.cpp create mode 100644 src/shader_recompiler/frontend/ir/patch.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index bbbfa98a3..7c11d15bf 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(shader_recompiler STATIC frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc + frontend/ir/patch.cpp + frontend/ir/patch.h frontend/ir/post_order.cpp frontend/ir/post_order.h frontend/ir/pred.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 032cf5e03..067f61613 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -125,19 +125,36 @@ u32 NumVertices(InputTopology input_topology) { throw InvalidArgument("Invalid input topology {}", input_topology); } -Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { - if (ctx.stage == Stage::Geometry) { - const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); +Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, + std::optional builtin = std::nullopt) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + if (per_invocation) { + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 32u)); + } + break; + case Stage::Geometry: + if (per_invocation) { + const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + } + break; + default: + break; } return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); } -Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { +Id DefineOutput(EmitContext& ctx, Id type, std::optional invocations, + std::optional builtin = std::nullopt) { + if (invocations && ctx.stage == Stage::TessellationControl) { + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], *invocations)); + } return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } -void DefineGenericOutput(EmitContext& ctx, size_t index) { +void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invocations) { static constexpr std::string_view swizzle{"xyzw"}; const size_t base_attr_index{static_cast(IR::Attribute::Generic0X) + index * 4}; u32 element{0}; @@ -150,7 +167,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) { } const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; - const Id id{DefineOutput(ctx, ctx.F32[num_components])}; + const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; ctx.Decorate(id, spv::Decoration::Location, static_cast(index)); if (element > 0) { ctx.Decorate(id, spv::Decoration::Component, element); @@ -161,10 +178,10 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) { ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); } if (num_components < 4 || element > 0) { - ctx.Name(id, fmt::format("out_attr{}", index)); - } else { const std::string_view subswizzle{swizzle.substr(element, num_components)}; ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } else { + ctx.Name(id, fmt::format("out_attr{}", index)); } const GenericElementInfo info{ .id = id, @@ -383,7 +400,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineInterfaces(program.info); + DefineInterfaces(program); DefineLocalMemory(program); DefineSharedMemory(program); DefineSharedMemoryFunctions(program); @@ -472,9 +489,9 @@ void EmitContext::DefineCommonConstants() { f32_zero_value = Constant(F32[1], 0.0f); } -void EmitContext::DefineInterfaces(const Info& info) { - DefineInputs(info); - DefineOutputs(info); +void EmitContext::DefineInterfaces(const IR::Program& program) { + DefineInputs(program.info); + DefineOutputs(program); } void EmitContext::DefineLocalMemory(const IR::Program& program) { @@ -972,26 +989,29 @@ void EmitContext::DefineLabels(IR::Program& program) { void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { - workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); + workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } if (info.uses_local_invocation_id) { - local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); + local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); + } + if (info.uses_invocation_id) { + invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); } if (info.uses_is_helper_invocation) { - is_helper_invocation = DefineInput(*this, U1, spv::BuiltIn::HelperInvocation); + is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } if (info.uses_subgroup_mask) { - subgroup_mask_eq = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupEqMaskKHR); - subgroup_mask_lt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLtMaskKHR); - subgroup_mask_le = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLeMaskKHR); - subgroup_mask_gt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGtMaskKHR); - subgroup_mask_ge = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGeMaskKHR); + subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); } if (info.uses_subgroup_invocation_id || (profile.warp_size_potentially_larger_than_guest && (info.uses_subgroup_vote || info.uses_subgroup_mask))) { subgroup_local_invocation_id = - DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); } if (info.uses_fswzadd) { const Id f32_one{Constant(F32[1], 1.0f)}; @@ -1004,29 +1024,32 @@ void EmitContext::DefineInputs(const Info& info) { if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = DefineInput(*this, F32[4], built_in); + input_position = DefineInput(*this, F32[4], true, built_in); } if (info.loads_instance_id) { if (profile.support_vertex_instance_id) { - instance_id = DefineInput(*this, U32[1], spv::BuiltIn::InstanceId); + instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { - instance_index = DefineInput(*this, U32[1], spv::BuiltIn::InstanceIndex); - base_instance = DefineInput(*this, U32[1], spv::BuiltIn::BaseInstance); + instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } if (info.loads_vertex_id) { if (profile.support_vertex_instance_id) { - vertex_id = DefineInput(*this, U32[1], spv::BuiltIn::VertexId); + vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { - vertex_index = DefineInput(*this, U32[1], spv::BuiltIn::VertexIndex); - base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex); + vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } if (info.loads_front_face) { - front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); + front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } if (info.loads_point_coord) { - point_coord = DefineInput(*this, F32[2], spv::BuiltIn::PointCoord); + point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); + } + if (info.loads_tess_coord) { + tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { const InputVarying generic{info.input_generics[index]}; @@ -1038,7 +1061,7 @@ void EmitContext::DefineInputs(const Info& info) { continue; } const Id type{GetAttributeType(*this, input_type)}; - const Id id{DefineInput(*this, type)}; + const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; @@ -1059,58 +1082,98 @@ void EmitContext::DefineInputs(const Info& info) { break; } } + if (stage == Stage::TessellationEval) { + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineInput(*this, F32[4], false)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast(index)); + patches[index] = id; + } + } } -void EmitContext::DefineOutputs(const Info& info) { +void EmitContext::DefineOutputs(const IR::Program& program) { + const Info& info{program.info}; + const std::optional invocations{program.invocations}; if (info.stores_position || stage == Stage::VertexB) { - output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); + output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } - output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); + output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } if (info.stores_clip_distance) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; - clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); + clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores_layer && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } - layer = DefineOutput(*this, U32[1], spv::BuiltIn::Layer); + layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } if (info.stores_viewport_index && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } - viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); + viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { - DefineGenericOutput(*this, index); + DefineGenericOutput(*this, index, invocations); } } - if (stage == Stage::Fragment) { + switch (stage) { + case Stage::TessellationControl: + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], Constant(U32[1], 4))}; + output_tess_level_outer = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], Constant(U32[1], 2))}; + output_tess_level_inner = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineOutput(*this, F32[4], std::nullopt)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast(index)); + patches[index] = id; + } + break; + case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { if (!info.stores_frag_color[index]) { continue; } - frag_color[index] = DefineOutput(*this, F32[4]); + frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); Decorate(frag_color[index], spv::Decoration::Location, index); Name(frag_color[index], fmt::format("frag_color{}", index)); } if (info.stores_frag_depth) { - frag_depth = DefineOutput(*this, F32[1]); + frag_depth = DefineOutput(*this, F32[1], std::nullopt); Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); Name(frag_depth, "frag_depth"); } + break; + default: + break; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 0da14d5f8..ba0a253b3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -147,6 +147,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; @@ -162,6 +163,7 @@ public: Id base_vertex{}; Id front_face{}; Id point_coord{}; + Id tess_coord{}; Id clip_distances{}; Id layer{}; Id viewport_index{}; @@ -204,6 +206,10 @@ public: Id output_position{}; std::array, 32> output_generics{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + std::array patches{}; + std::array frag_color{}; Id frag_depth{}; @@ -212,7 +218,7 @@ public: private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineInterfaces(const Info& info); + void DefineInterfaces(const IR::Program& program); void DefineLocalMemory(const IR::Program& program); void DefineSharedMemory(const IR::Program& program); void DefineSharedMemoryFunctions(const IR::Program& program); @@ -226,7 +232,7 @@ private: void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); - void DefineOutputs(const Info& info); + void DefineOutputs(const IR::Program& program); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3bf4c6a9e..105602ccf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -45,6 +45,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.Label(); } else if constexpr (std::is_same_v) { return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } else if constexpr (std::is_same_v) { return arg.Reg(); } @@ -120,6 +122,30 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Isolines: + return spv::ExecutionMode::Isolines; + case TessPrimitive::Triangles: + return spv::ExecutionMode::Triangles; + case TessPrimitive::Quads: + return spv::ExecutionMode::Quads; + } + throw InvalidArgument("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return spv::ExecutionMode::SpacingEqual; + case TessSpacing::FractionalOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case TessSpacing::FractionalEven: + return spv::ExecutionMode::SpacingFractionalEven; + } + throw InvalidArgument("Tessellation spacing {}", spacing); +} + void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; @@ -134,6 +160,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Stage::VertexB: execution_model = spv::ExecutionModel::Vertex; break; + case Stage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); + break; + case Stage::TessellationEval: + execution_model = spv::ExecutionModel::TessellationEvaluation; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing)); + ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); + break; case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 55b2edba0..8caf30f1b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -55,6 +55,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); @@ -67,6 +69,7 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 59c56c5ba..4a1aeece5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -32,13 +32,26 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { template Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return ctx.OpAccessChain(pointer_type, base, vertex, std::forward(args)...); - } else { + default: return ctx.OpAccessChain(pointer_type, base, std::forward(args)...); } } +template +Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { + if (ctx.stage == Stage::TessellationControl) { + const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; + return ctx.OpAccessChain(result_type, base, invocation_id, std::forward(args)...); + } else { + return ctx.OpAccessChain(result_type, base, std::forward(args)...); + } +} + std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; @@ -49,7 +62,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } else { const u32 index_element{element - info.first_element}; const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; - return ctx.OpAccessChain(ctx.output_f32, info.id, index_id); + return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } switch (attr) { @@ -61,7 +74,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionW: { const u32 element{static_cast(attr) % 4}; const Id element_id{ctx.Constant(ctx.U32[1], element)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id); + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: @@ -74,7 +87,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 base{static_cast(IR::Attribute::ClipDistance0)}; const u32 index{static_cast(attr) - base}; const Id clip_num{ctx.Constant(ctx.U32[1], index)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); + return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::Layer: return ctx.profile.support_viewport_index_layer_non_geometry || @@ -222,11 +235,18 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { ctx.Constant(ctx.U32[1], std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, - ctx.u32_zero_value)); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, + ctx.Constant(ctx.U32[1], 1U))); + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, + ctx.Constant(ctx.U32[1], 1U))); + default: throw NotImplementedException("Read attribute {}", attr); } @@ -240,9 +260,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_un } Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); - } else { + default: return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); } } @@ -251,6 +274,45 @@ void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unus ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); } +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.Constant(ctx.U32[1], index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.Constant(ctx.U32[1], 1u)); + default: + throw NotImplementedException("Patch {}", patch); + } + }()}; + ctx.OpStore(pointer, value); +} + void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { const Id component_id{ctx.Constant(ctx.U32[1], component)}; const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; @@ -301,6 +363,10 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); +} + Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d66eb17a6..b821d9f47 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -331,6 +331,14 @@ void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, c Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } @@ -363,6 +371,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U32 IREmitter::InvocationId() { + return Inst(Opcode::InvocationId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e70359eb1..7f8f1ad42 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -84,6 +84,9 @@ public: [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); @@ -95,6 +98,7 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U32 InvocationId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 204c55fa8..b2d7573d9 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -73,6 +73,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::EndPrimitive: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetPatch: case Opcode::SetFragColor: case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 7d3e0b2ab..7f04b647b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -24,6 +24,7 @@ constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 7a21fe746..a86542cd8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -48,6 +48,8 @@ OPCODE(GetAttribute, F32, Attr OPCODE(SetAttribute, Void, Attribute, F32, U32, ) OPCODE(GetAttributeIndexed, F32, U32, U32, ) OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) @@ -60,6 +62,7 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(InvocationId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..1f770bc48 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/patch.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +u32 GenericPatchIndex(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +u32 GenericPatchElement(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + ComponentPadding0, + ComponentPadding1, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 127); + +[[nodiscard]] bool IsGeneric(Patch patch) noexcept; + +[[nodiscard]] u32 GenericPatchIndex(Patch patch); + +[[nodiscard]] u32 GenericPatchElement(Patch patch); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 9a32ca1e8..8b3b33852 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -20,26 +20,27 @@ enum class Type { Reg = 1 << 2, Pred = 1 << 3, Attribute = 1 << 4, - U1 = 1 << 5, - U8 = 1 << 6, - U16 = 1 << 7, - U32 = 1 << 8, - U64 = 1 << 9, - F16 = 1 << 10, - F32 = 1 << 11, - F64 = 1 << 12, - U32x2 = 1 << 13, - U32x3 = 1 << 14, - U32x4 = 1 << 15, - F16x2 = 1 << 16, - F16x3 = 1 << 17, - F16x4 = 1 << 18, - F32x2 = 1 << 19, - F32x3 = 1 << 20, - F32x4 = 1 << 21, - F64x2 = 1 << 22, - F64x3 = 1 << 23, - F64x4 = 1 << 24, + Patch = 1 << 5, + U1 = 1 << 6, + U8 = 1 << 7, + U16 = 1 << 8, + U32 = 1 << 9, + U64 = 1 << 10, + F16 = 1 << 11, + F32 = 1 << 12, + F64 = 1 << 13, + U32x2 = 1 << 14, + U32x3 = 1 << 15, + U32x4 = 1 << 16, + F16x2 = 1 << 17, + F16x3 = 1 << 18, + F16x4 = 1 << 19, + F32x2 = 1 << 20, + F32x3 = 1 << 21, + F32x4 = 1 << 22, + F64x2 = 1 << 23, + F64x3 = 1 << 24, + F64x4 = 1 << 25, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e7ffb86d..bf5f8c0c2 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -18,6 +18,8 @@ Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} @@ -109,6 +111,11 @@ IR::Attribute Value::Attribute() const { return attribute; } +IR::Patch Value::Patch() const { + ValidateAccess(Type::Patch); + return patch; +} + bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); @@ -182,6 +189,8 @@ bool Value::operator==(const Value& other) const { return pred == other.pred; case Type::Attribute: return attribute == other.attribute; + case Type::Patch: + return patch == other.patch; case Type::U1: return imm_u1 == other.imm_u1; case Type::U8: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index a0962863d..303745563 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -9,6 +9,7 @@ #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { @@ -24,6 +25,7 @@ public: explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch value) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -46,6 +48,7 @@ public: [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -67,6 +70,7 @@ private: IR::Reg reg; IR::Pred pred; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ab67446c8..20a1d61cc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -70,6 +70,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool size; } const ald{insn}; - if (ald.o != 0) { - throw NotImplementedException("O"); - } - if (ald.patch != 0) { - throw NotImplementedException("P"); - } const u64 offset{ald.absolute_offset.Value()}; if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); @@ -84,11 +78,19 @@ void TranslatorVisitor::ALD(u64 insn) { const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + } } return; } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); @@ -106,9 +108,6 @@ void TranslatorVisitor::AST(u64 insn) { BitField<47, 2, Size> size; } const ast{insn}; - if (ast.patch != 0) { - throw NotImplementedException("P"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } @@ -120,11 +119,19 @@ void TranslatorVisitor::AST(u64 insn) { const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + element)); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + } } return; } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index bc822d585..660b84c20 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 617ec05ce..aadcf7999 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -53,6 +53,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PointSpriteT: info.loads_point_coord = true; break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + info.loads_tess_coord = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } @@ -94,6 +98,34 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } } +void GetPatch(Info& info, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Reading non-generic patch {}", patch); + } + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; +} + +void SetPatch(Info& info, IR::Patch patch) { + if (IR::IsGeneric(patch)) { + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodBottom: + info.stores_tess_level_outer = true; + break; + case IR::Patch::TessellationLodInteriorU: + case IR::Patch::TessellationLodInteriorV: + info.stores_tess_level_inner = true; + break; + default: + throw NotImplementedException("Set patch {}", patch); + } +} + void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: @@ -350,6 +382,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetAttribute: SetAttribute(info, inst.Arg(0).Attribute()); break; + case IR::Opcode::GetPatch: + GetPatch(info, inst.Arg(0).Patch()); + break; + case IR::Opcode::SetPatch: + SetPatch(info, inst.Arg(0).Patch()); + break; case IR::Opcode::GetAttributeIndexed: info.loads_indexed_attributes = true; break; @@ -368,6 +406,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::InvocationId: + info.uses_invocation_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c26017d75..3a04f075e 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,18 @@ enum class CompareFunction { Always, }; +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + struct TransformFeedbackVarying { u32 buffer{}; u32 stride{}; @@ -74,6 +86,10 @@ struct Profile { bool convert_depth_mode{}; bool force_early_z{}; + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + InputTopology input_topology{}; std::optional fixed_state_point_size; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 336c6131a..4dbf9ed12 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -101,8 +101,10 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + bool uses_invocation_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; + std::array uses_patches{}; std::array input_generics{}; bool loads_position{}; @@ -110,6 +112,7 @@ struct Info { bool loads_vertex_id{}; bool loads_front_face{}; bool loads_point_coord{}; + bool loads_tess_coord{}; bool loads_indexed_attributes{}; std::array stores_frag_color{}; @@ -120,6 +123,8 @@ struct Info { bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; bool uses_fp16{}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index dc4ff0da2..8f0b0b8ec 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -685,6 +685,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { return {}; } +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return VK_POLYGON_MODE_POINT; + case Maxwell::PolygonMode::Line: + return VK_POLYGON_MODE_LINE; + case Maxwell::PolygonMode::Fill: + return VK_POLYGON_MODE_FILL; + } + UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); + return {}; +} + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 9f78e15b6..50a599c11 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -65,6 +65,8 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 84720a6f9..d5e9dae0f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -355,7 +355,8 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, + .polygonMode = + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ee22255bf..0bccc640a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1040,6 +1040,36 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + profile.tess_clockwise = key.state.tessellation_clockwise == 0; + profile.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + profile.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; case Shader::Stage::Geometry: if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 87cfe6312..f0de19ba1 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -225,7 +225,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .drawIndirectFirstInstance = false, .depthClamp = true, .depthBiasClamp = true, - .fillModeNonSolid = false, + .fillModeNonSolid = true, .depthBounds = false, .wideLines = false, .largePoints = true, @@ -670,6 +670,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.largePoints, "largePoints"), std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), + std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), From b0f1255c8cb800e9f336be66b3f16c3d958673d2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 01:55:06 -0300 Subject: [PATCH 239/770] shader: Implement PrimitiveId --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 +++ src/shader_recompiler/backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 2 ++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/shader_info.h | 1 + 5 files changed, 10 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 067f61613..3946dab14 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1021,6 +1021,9 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } + if (info.loads_primitive_id) { + primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index ba0a253b3..c7d6f8a38 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -167,6 +167,7 @@ public: Id clip_distances{}; Id layer{}; Id viewport_index{}; + Id primitive_id{}; Id fswzadd_lut_a{}; Id fswzadd_lut_b{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4a1aeece5..23a74f966 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -210,6 +210,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } switch (attr) { + case IR::Attribute::PrimitiveId: + return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index aadcf7999..c84bf211f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -34,6 +34,9 @@ void GetAttribute(Info& info, IR::Attribute attribute) { return; } switch (attribute) { + case IR::Attribute::PrimitiveId: + info.loads_primitive_id = true; + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4dbf9ed12..d6cde1596 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -107,6 +107,7 @@ struct Info { std::array uses_patches{}; std::array input_generics{}; + bool loads_primitive_id{}; bool loads_position{}; bool loads_instance_id{}; bool loads_vertex_id{}; From 4657cf78fd44c8f205cdbab73f1929e31df776fa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 02:33:46 -0300 Subject: [PATCH 240/770] spirv: Bitcast non-F32 attributes to F32 --- .../backend/spirv/emit_spirv_context_get_set.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 23a74f966..f3de577f6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -211,7 +211,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { } switch (attr) { case IR::Attribute::PrimitiveId: - return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: @@ -220,17 +220,19 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id())); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { - return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); } else { - return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_index), - ctx.OpLoad(ctx.U32[1], ctx.base_instance)); + const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; + return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } case IR::Attribute::VertexId: if (ctx.profile.support_vertex_instance_id) { - return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); + return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); } else { - return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index), - ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); + const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; + return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); } case IR::Attribute::FrontFace: return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), From e3514bcd6b09f623da14c4f3c4ffd988e75577ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:31:15 -0300 Subject: [PATCH 241/770] spirv: Implement ViewportMask with NV_viewport_array2 --- src/shader_recompiler/backend/spirv/emit_context.cpp | 4 ++++ src/shader_recompiler/backend/spirv/emit_context.h | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 ++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 5 +++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 10 files changed, 32 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3946dab14..2f8678b4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -457,6 +457,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); if (info.uses_int8) { AddCapability(spv::Capability::Int8); @@ -1131,6 +1132,9 @@ void EmitContext::DefineOutputs(const IR::Program& program) { } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Constant(U32[1], 1u)), std::nullopt); + } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { DefineGenericOutput(*this, index, invocations); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c7d6f8a38..c41cad098 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -134,6 +134,7 @@ public: Id input_s32{}; Id output_f32{}; + Id output_u32{}; Id image_buffer_type{}; Id sampled_texture_buffer_type{}; @@ -167,6 +168,7 @@ public: Id clip_distances{}; Id layer{}; Id viewport_index{}; + Id viewport_mask{}; Id primitive_id{}; Id fswzadd_lut_a{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 105602ccf..90c4833a8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -303,6 +303,10 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.stores_viewport_index) { ctx.AddCapability(spv::Capability::MultiViewport); } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + ctx.AddExtension("SPV_NV_viewport_array2"); + ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); + } if (info.stores_layer || info.stores_viewport_index) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3de577f6..ca067f1c4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -99,6 +99,11 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { ctx.stage == Shader::Stage::Geometry ? std::optional{ctx.viewport_index} : std::nullopt; + case IR::Attribute::ViewportMask: + if (!ctx.profile.support_viewport_mask) { + return std::nullopt; + } + return ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value); default: throw NotImplementedException("Read attribute {}", attr); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c84bf211f..9631a445e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -96,6 +96,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ViewportIndex: info.stores_viewport_index = true; break; + case IR::Attribute::ViewportMask: + info.stores_viewport_mask = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3a04f075e..a2c2948d5 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -75,6 +75,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; + bool support_viewport_mask{}; bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d6cde1596..d33df8aad 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -124,6 +124,7 @@ struct Info { bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_viewport_mask{}; bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0bccc640a..4d0d3ebb7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -690,6 +690,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0de19ba1..72b83f99a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -346,6 +346,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); } + if (!nv_viewport_array2) { + LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -724,6 +728,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4e6d13308..4415558bb 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -169,6 +169,11 @@ public: return nv_viewport_swizzle; } + /// Returns true if the device supports VK_NV_viewport_array2. + bool IsNvViewportArray2Supported() const { + return nv_viewport_array2; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -312,6 +317,7 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. + bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. From f3473c5143fddc0c34eb01f52523bb49d94c1bbb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:46:44 -0300 Subject: [PATCH 242/770] spirv: Bitcast non-F32 output attributes to their type before store --- .../spirv/emit_spirv_context_get_set.cpp | 41 +++++++++++++------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ca067f1c4..aaa20ab95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -52,7 +52,15 @@ Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) } } -std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { +struct OutAttr { + OutAttr(Id pointer_) : pointer{pointer_} {} + OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {} + + Id pointer{}; + Id type{}; +}; + +std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const u32 element{IR::GenericAttributeElement(attr)}; @@ -90,20 +98,23 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::Layer: - return ctx.profile.support_viewport_index_layer_non_geometry || - ctx.stage == Shader::Stage::Geometry - ? std::optional{ctx.layer} - : std::nullopt; + if (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) { + return OutAttr{ctx.layer, ctx.U32[1]}; + } + return std::nullopt; case IR::Attribute::ViewportIndex: - return ctx.profile.support_viewport_index_layer_non_geometry || - ctx.stage == Shader::Stage::Geometry - ? std::optional{ctx.viewport_index} - : std::nullopt; + if (ctx.profile.support_viewport_index_layer_non_geometry || + ctx.stage == Shader::Stage::Geometry) { + return OutAttr{ctx.viewport_index, ctx.U32[1]}; + } + return std::nullopt; case IR::Attribute::ViewportMask: if (!ctx.profile.support_viewport_mask) { return std::nullopt; } - return ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value); + return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value), + ctx.U32[1]}; default: throw NotImplementedException("Read attribute {}", attr); } @@ -262,10 +273,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { - const std::optional output{OutputAttrPointer(ctx, attr)}; - if (output) { - ctx.OpStore(*output, value); + const std::optional output{OutputAttrPointer(ctx, attr)}; + if (!output) { + return; } + if (Sirit::ValidId(output->type)) { + value = ctx.OpBitcast(output->type, value); + } + ctx.OpStore(output->pointer, value); } Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: [PATCH 243/770] shader: Implement PIXLD.MY_INDEX --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 3 ++ .../backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv.cpp | 3 ++ .../backend/spirv/emit_spirv.h | 1 + .../spirv/emit_spirv_context_get_set.cpp | 4 ++ .../frontend/ir/ir_emitter.cpp | 4 ++ .../frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../translate/impl/not_implemented.cpp | 4 -- .../maxwell/translate/impl/pixel_load.cpp | 46 +++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 3 ++ src/shader_recompiler/shader_info.h | 1 + .../vulkan_common/vulkan_device.cpp | 3 +- 14 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 7c11d15bf..07963a760 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -137,6 +137,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/impl/output_geometry.cpp + frontend/maxwell/translate/impl/pixel_load.cpp frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2f8678b4e..0b4abeb44 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -998,6 +998,9 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_invocation_id) { invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); } + if (info.uses_sample_id) { + sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + } if (info.uses_is_helper_invocation) { is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c41cad098..9d8340333 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -149,6 +149,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; Id invocation_id{}; + Id sample_id{}; Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 90c4833a8..9ec970706 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -335,6 +335,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_writes) { ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } + if (info.uses_sample_id) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } if (!ctx.profile.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8caf30f1b..dfddf5e58 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -70,6 +70,7 @@ void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index aaa20ab95..7555dd94c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -391,6 +391,10 @@ Id EmitInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); } +Id EmitSampleId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.sample_id); +} + Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b821d9f47..141efd86c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -375,6 +375,10 @@ U32 IREmitter::InvocationId() { return Inst(Opcode::InvocationId); } +U32 IREmitter::SampleId() { + return Inst(Opcode::SampleId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7f8f1ad42..81833d928 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,7 @@ public: [[nodiscard]] U32 LocalInvocationIdZ(); [[nodiscard]] U32 InvocationId(); + [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a86542cd8..d5e443673 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -63,6 +63,7 @@ OPCODE(SetOFlag, Void, U1, OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) OPCODE(InvocationId, U32, ) +OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a45d1e4be..a4f99bbbe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,10 +181,6 @@ void TranslatorVisitor::PEXIT(u64) { ThrowNotImplemented(Opcode::PEXIT); } -void TranslatorVisitor::PIXLD(u64) { - ThrowNotImplemented(Opcode::PIXLD); -} - void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9631a445e..5d1310466 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -415,6 +415,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::InvocationId: info.uses_invocation_id = true; break; + case IR::Opcode::SampleId: + info.uses_sample_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d33df8aad..686f5c719 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -102,6 +102,7 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; bool uses_invocation_id{}; + bool uses_sample_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; std::array uses_patches{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 72b83f99a..038231298 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -218,7 +218,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .independentBlend = true, .geometryShader = true, .tessellationShader = true, - .sampleRateShading = false, + .sampleRateShading = true, .dualSrcBlend = false, .logicOp = false, .multiDrawIndirect = false, @@ -677,6 +677,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.sampleRateShading, "sampleRateShading"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), From 80940b17069f6baa733a9b572445b27bc7509137 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 18:47:26 -0300 Subject: [PATCH 244/770] shader: Implement SampleMask --- src/shader_recompiler/backend/spirv/emit_context.cpp | 5 ++++- src/shader_recompiler/backend/spirv/emit_context.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/exit_program.cpp | 2 +- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/shader_info.h | 1 + 11 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0b4abeb44..b9e6d5655 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1179,7 +1179,10 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (info.stores_frag_depth) { frag_depth = DefineOutput(*this, F32[1], std::nullopt); Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); - Name(frag_depth, "frag_depth"); + } + if (info.stores_sample_mask) { + sample_mask = DefineOutput(*this, U32[1], std::nullopt); + Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); } break; default: diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 9d8340333..528dc33fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -215,6 +215,7 @@ public: std::array patches{}; std::array frag_color{}; + Id sample_mask{}; Id frag_depth{}; std::vector interfaces; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index dfddf5e58..9f658a4bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -58,6 +58,7 @@ void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 7555dd94c..e5e4c352b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -343,6 +343,10 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { ctx.OpStore(pointer, value); } +void EmitSetSampleMask(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.sample_mask, value); +} + void EmitSetFragDepth(EmitContext& ctx, Id value) { ctx.OpStore(ctx.frag_depth, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 141efd86c..ef3b00bc2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -343,6 +343,10 @@ void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } +void IREmitter::SetSampleMask(const U32& value) { + Inst(Opcode::SetSampleMask, value); +} + void IREmitter::SetFragDepth(const F32& value) { Inst(Opcode::SetFragDepth, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 81833d928..1a585df15 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -88,6 +88,7 @@ public: void SetPatch(Patch patch, const F32& value); void SetFragColor(u32 index, u32 component, const F32& value); + void SetSampleMask(const U32& value); void SetFragDepth(const F32& value); [[nodiscard]] U32 WorkgroupIdX(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b2d7573d9..b53fe2e2a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -75,6 +75,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SetAttributeIndexed: case Opcode::SetPatch: case Opcode::SetFragColor: + case Opcode::SetSampleMask: case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d5e443673..0748efa8d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -51,6 +51,7 @@ OPCODE(SetAttributeIndexed, Void, U32, OPCODE(GetPatch, F32, Patch, ) OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetSampleMask, Void, U32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index 58a53c0ec..c2443c886 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -22,7 +22,7 @@ void ExitFragment(TranslatorVisitor& v) { } } if (sph.ps.omap.sample_mask != 0) { - throw NotImplementedException("Sample mask"); + v.ir.SetSampleMask(v.X(src_reg)); } if (sph.ps.omap.depth != 0) { v.ir.SetFragDepth(v.F(src_reg + 1)); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5d1310466..60b7d3a36 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -403,6 +403,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetFragColor: info.stores_frag_color[inst.Arg(0).U32()] = true; break; + case IR::Opcode::SetSampleMask: + info.stores_sample_mask = true; + break; case IR::Opcode::SetFragDepth: info.stores_frag_depth = true; break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 686f5c719..0a8931930 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -118,6 +118,7 @@ struct Info { bool loads_indexed_attributes{}; std::array stores_frag_color{}; + bool stores_sample_mask{}; bool stores_frag_depth{}; std::array stores_generics{}; bool stores_position{}; From be431f5ed080955cce358e9750347229b2bc9a04 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Apr 2021 00:48:35 -0400 Subject: [PATCH 245/770] shader: Implement BFE and BFI CC Fix two bugs in BFI. --- .../backend/spirv/emit_spirv_select.cpp | 3 +-- .../maxwell/translate/impl/bitfield_extract.cpp | 11 +++++++---- .../maxwell/translate/impl/bitfield_insert.cpp | 17 +++++++++-------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 8b0562da5..0b45db45e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -10,8 +10,7 @@ Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.U1, cond, true_value, false_value); } -Id EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Id cond, - [[maybe_unused]] Id true_value, [[maybe_unused]] Id false_value) { +Id EmitSelectU8(EmitContext&, Id, Id, Id) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp index 0738ae7a6..9d5a87e52 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -18,10 +18,6 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { BitField<48, 1, u64> is_signed; } const bfe{insn}; - if (bfe.cc != 0) { - throw NotImplementedException("BFE CC"); - } - const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; @@ -53,6 +49,13 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { result = IR::U32{v.ir.Select(zero_count, zero, result)}; v.X(bfe.dest_reg, result); + + if (bfe.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp index fb7f821e6..1e1ec2119 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -16,18 +16,14 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba BitField<47, 1, u64> cc; } const bfi{insn}; - if (bfi.cc != 0) { - throw NotImplementedException("BFI CC"); - } - - const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; const IR::U32 max_size{v.ir.Imm32(32)}; // Edge case conditions - const IR::U1 zero_offset{v.ir.IEqual(offset, v.ir.Imm32(0))}; const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; - const IR::U1 exceed_count{v.ir.IGreaterThanEqual(unsafe_count, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; @@ -36,9 +32,14 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; result = IR::U32{v.ir.Select(exceed_offset, base, result)}; - result = IR::U32{v.ir.Select(zero_offset, base, result)}; v.X(bfi.dest_reg, result); + if (bfi.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace From dbbd4b549682dd4302e6258cced184a0ec78e99a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 02:59:54 -0300 Subject: [PATCH 246/770] spirv: Use ConstOffset instead of Offset when possible --- .../backend/spirv/emit_context.h | 17 +++++ .../backend/spirv/emit_spirv.h | 8 +-- .../backend/spirv/emit_spirv_image.cpp | 63 ++++++++++++++----- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 528dc33fe..9db2b0c94 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -97,6 +97,23 @@ public: [[nodiscard]] Id Def(const IR::Value& value); + Id Const(u32 value) { + return Constant(U32[1], value); + } + + Id Const(u32 element_1, u32 element_2) { + return ConstantComposite(U32[2], Const(element_1), Const(element_2)); + } + + Id Const(u32 element_1, u32 element_2, u32 element_3) { + return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3)); + } + + Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { + return ConstantComposite(U32[2], Const(element_1), Const(element_2), Const(element_3), + Const(element_4)); + } + const Profile& profile; Stage stage{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 9f658a4bd..cf8d74f4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -490,13 +490,13 @@ Id EmitBoundImageGradient(EmitContext&); Id EmitBoundImageRead(EmitContext&); Id EmitBoundImageWrite(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, Id offset); + Id bias_lc, const IR::Value& offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, Id offset); + Id lod_lc, const IR::Value& offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, Id offset); + Id coords, Id dref, Id bias_lc, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, Id offset); + Id coords, Id dref, Id lod_lc, const IR::Value& offset); Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index c8d1d25b1..021933a8c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -12,7 +12,7 @@ namespace { class ImageOperands { public: explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, - Id lod, Id offset) { + Id lod, const IR::Value& offset) { if (has_bias) { const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; Add(spv::ImageOperandsMask::Bias, bias); @@ -21,9 +21,7 @@ public: const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; Add(spv::ImageOperandsMask::Lod, lod_value); } - if (Sirit::ValidId(offset)) { - Add(spv::ImageOperandsMask::Offset, offset); - } + AddOffset(ctx, offset); if (has_lod_clamp) { const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; Add(spv::ImageOperandsMask::MinLod, lod_clamp); @@ -96,6 +94,46 @@ public: } } + std::span Span() const noexcept { + return std::span{operands.data(), operands.size()}; + } + + spv::ImageOperandsMask Mask() const noexcept { + return mask; + } + +private: + void AddOffset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return; + } + if (offset.IsImmediate()) { + Add(spv::ImageOperandsMask::ConstOffset, ctx.Constant(ctx.U32[1], offset.U32())); + return; + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.Const(inst->Arg(0).U32(), inst->Arg(1).U32())); + return; + case IR::Opcode::CompositeConstructU32x3: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.Const(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32())); + return; + case IR::Opcode::CompositeConstructU32x4: + Add(spv::ImageOperandsMask::ConstOffset, + ctx.Const(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32(), + inst->Arg(3).U32())); + return; + default: + break; + } + } + Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); + } + void Add(spv::ImageOperandsMask new_mask, Id value) { mask = static_cast(static_cast(mask) | static_cast(new_mask)); @@ -109,15 +147,6 @@ public: operands.push_back(value_2); } - std::span Span() const noexcept { - return std::span{operands.data(), operands.size()}; - } - - spv::ImageOperandsMask Mask() const noexcept { - return mask; - } - -private: boost::container::static_vector operands; spv::ImageOperandsMask mask{}; }; @@ -279,7 +308,7 @@ Id EmitBoundImageWrite(EmitContext&) { } Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, Id offset) { + Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, offset); @@ -289,7 +318,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, Id offset) { + Id lod_lc, const IR::Value& offset) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, @@ -298,7 +327,7 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, Id offset) { + Id coords, Id dref, Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, offset); @@ -308,7 +337,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, Id offset) { + Id coords, Id dref, Id lod_lc, const IR::Value& offset) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, From 7cfa403683f46cfca71ef2caf4ff53355eac47b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 03:07:31 -0300 Subject: [PATCH 247/770] spirv: Use explicit lods outside of fragment shaders --- .../backend/spirv/emit_spirv_image.cpp | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 021933a8c..fea3bc112 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -310,11 +310,22 @@ Id EmitBoundImageWrite(EmitContext&) { Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, const IR::Value& offset) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, - offset); - return Emit(&EmitContext::OpImageSparseSampleImplicitLod, - &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + if (ctx.stage == Stage::Fragment) { + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, + bias_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, index), coords, operands.Mask(), operands.Span()); + } else { + // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as + // if the lod was explicitly zero. This may change on Turing with implicit compute + // derivatives + const Id lod{ctx.Const(0)}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, index), coords, operands.Mask(), operands.Span()); + } } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, From c9e4609d87570fc407014cd4b34a60611ad63fac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 03:19:54 -0300 Subject: [PATCH 248/770] spirv: Fix implicit lod type --- src/shader_recompiler/backend/spirv/emit_context.h | 4 ++++ src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 9db2b0c94..7567fdcac 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -101,6 +101,10 @@ public: return Constant(U32[1], value); } + Id Const(f32 value) { + return Constant(F32[1], value); + } + Id Const(u32 element_1, u32 element_2) { return ConstantComposite(U32[2], Const(element_1), Const(element_2)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fea3bc112..7a4388e7e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -320,7 +320,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as // if the lod was explicitly zero. This may change on Turing with implicit compute // derivatives - const Id lod{ctx.Const(0)}; + const Id lod{ctx.Const(0.0f)}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], From 0a0818c0259b4f90f1f7bb37fcffbc1f194ca4d0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 03:21:03 -0300 Subject: [PATCH 249/770] shader: Fix memory barriers --- .../backend/spirv/emit_spirv.h | 5 ++-- .../backend/spirv/emit_spirv_barriers.cpp | 14 ++++----- .../frontend/ir/ir_emitter.cpp | 20 ++++--------- .../frontend/ir/ir_emitter.h | 5 ++-- .../frontend/ir/microinstruction.cpp | 5 ++-- src/shader_recompiler/frontend/ir/modifiers.h | 8 ----- src/shader_recompiler/frontend/ir/opcodes.inc | 5 ++-- .../translate/impl/barrier_operations.cpp | 30 +++++++------------ 8 files changed, 30 insertions(+), 62 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index cf8d74f4e..d43c72f6e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -29,9 +29,8 @@ void EmitReturn(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitBarrier(EmitContext& ctx); -void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx); -void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); -void EmitMemoryBarrierSystemLevel(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 74f523d0f..366dc6a0c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { namespace { -void EmitMemoryBarrierImpl(EmitContext& ctx, spv::Scope scope) { +void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { const auto semantics{ spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | @@ -27,16 +27,12 @@ void EmitBarrier(EmitContext& ctx) { ctx.Constant(ctx.U32[1], static_cast(memory_semantics))); } -void EmitMemoryBarrierWorkgroupLevel(EmitContext& ctx) { - EmitMemoryBarrierImpl(ctx, spv::Scope::Workgroup); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + MemoryBarrier(ctx, spv::Scope::Workgroup); } -void EmitMemoryBarrierDeviceLevel(EmitContext& ctx) { - EmitMemoryBarrierImpl(ctx, spv::Scope::Device); -} - -void EmitMemoryBarrierSystemLevel(EmitContext& ctx) { - EmitMemoryBarrierImpl(ctx, spv::Scope::CrossDevice); +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + MemoryBarrier(ctx, spv::Scope::Device); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ef3b00bc2..aebe7200f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -86,20 +86,12 @@ void IREmitter::Barrier() { Inst(Opcode::Barrier); } -void IREmitter::MemoryBarrier(MemoryScope scope) { - switch (scope) { - case MemoryScope::Workgroup: - Inst(Opcode::MemoryBarrierWorkgroupLevel); - break; - case MemoryScope::Device: - Inst(Opcode::MemoryBarrierDeviceLevel); - break; - case MemoryScope::System: - Inst(Opcode::MemoryBarrierSystemLevel); - break; - default: - throw InvalidArgument("Invalid memory scope {}", scope); - } +void IREmitter::WorkgroupMemoryBarrier() { + Inst(Opcode::WorkgroupMemoryBarrier); +} + +void IREmitter::DeviceMemoryBarrier() { + Inst(Opcode::DeviceMemoryBarrier); } void IREmitter::Return() { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1a585df15..b9d051b43 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -144,8 +144,9 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); - [[nodiscard]] void Barrier(); - [[nodiscard]] void MemoryBarrier(MemoryScope scope); + void Barrier(); + void WorkgroupMemoryBarrier(); + void DeviceMemoryBarrier(); template [[nodiscard]] Dest BitCast(const Source& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b53fe2e2a..efa426808 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -64,9 +64,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: - case Opcode::MemoryBarrierWorkgroupLevel: - case Opcode::MemoryBarrierDeviceLevel: - case Opcode::MemoryBarrierSystemLevel: + case Opcode::WorkgroupMemoryBarrier: + case Opcode::DeviceMemoryBarrier: case Opcode::Prologue: case Opcode::Epilogue: case Opcode::EmitVertex: diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 447e9703c..5d7efa14c 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,14 +25,6 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; -enum class MemoryScope : u32 { - DontCare, - Warp, - Workgroup, - Device, - System, -}; - struct FpControl { bool no_contraction{false}; FpRounding rounding{FpRounding::DontCare}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0748efa8d..1cfc2a943 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -18,9 +18,8 @@ OPCODE(DemoteToHelperInvocation, Void, Labe // Barriers OPCODE(Barrier, Void, ) -OPCODE(MemoryBarrierWorkgroupLevel, Void, ) -OPCODE(MemoryBarrierDeviceLevel, Void, ) -OPCODE(MemoryBarrierSystemLevel, Void, ) +OPCODE(WorkgroupMemoryBarrier, Void, ) +OPCODE(DeviceMemoryBarrier, Void, ) // Special operations OPCODE(Prologue, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 2a2a294df..86e433e41 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -12,34 +12,24 @@ namespace Shader::Maxwell { namespace { // Seems to be in CUDA terminology. enum class LocalScope : u64 { - CTG = 0, - GL = 1, - SYS = 2, - VC = 3, + CTA, + GL, + SYS, + VC, }; - -IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { - switch (scope) { - case LocalScope::CTG: - return IR::MemoryScope::Workgroup; - case LocalScope::GL: - return IR::MemoryScope::Device; - case LocalScope::SYS: - return IR::MemoryScope::System; - default: - throw NotImplementedException("Unimplemented Local Scope {}", scope); - } -} - } // Anonymous namespace void TranslatorVisitor::MEMBAR(u64 inst) { union { u64 raw; BitField<8, 2, LocalScope> scope; - } membar{inst}; + } const membar{inst}; - ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); + if (membar.scope == LocalScope::CTA) { + ir.WorkgroupMemoryBarrier(); + } else { + ir.DeviceMemoryBarrier(); + } } void TranslatorVisitor::DEPBAR() { From 50f8007172ce143a632270510f96093c82018952 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 16:40:35 -0300 Subject: [PATCH 250/770] shader: Fix Phi node types --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ---- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index efa426808..7555ac00a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -275,10 +275,6 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } - if (Flags() == IR::Type::Void) { - // Set the type of the phi node - SetFlags(value.Type()); - } phi_args.emplace_back(predecessor, value); } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 346fcc377..ddd679e39 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -176,6 +176,8 @@ public: } else if (!sealed_blocks.contains(block)) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; } else if (const std::span imm_preds{block->ImmediatePredecessors()}; @@ -187,6 +189,8 @@ public: } else { // Break potential cycles with operandless phi IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); + WriteVariable(variable, block, IR::Value{phi}); stack.back().phi = phi; From f18a6dd1bdaffda4c3e771af3cf7cf41919ebd67 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 16 Apr 2021 23:52:58 +0200 Subject: [PATCH 251/770] shader: Implement SR_Y_DIRECTION --- src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 7 +++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../maxwell/translate/impl/move_special_register.cpp | 2 ++ src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 2 ++ src/video_core/renderer_vulkan/fixed_pipeline_state.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 10 files changed, 22 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index d43c72f6e..7949d08d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -72,6 +72,7 @@ Id EmitLocalInvocationId(EmitContext& ctx); Id EmitInvocationId(EmitContext& ctx); Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e5e4c352b..1030404c0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -403,6 +403,13 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } +Id EmitYDirection(EmitContext& ctx) { + if (ctx.profile.y_negate) { + return ctx.Constant(ctx.F32[1], -1.0f); + } + return ctx.Constant(ctx.F32[1], 1.0f); +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index aebe7200f..c3e8d0681 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -379,6 +379,10 @@ U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } +F32 IREmitter::YDirection() { + return Inst(Opcode::YDirection); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index b9d051b43..7e67f5e30 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -102,6 +102,7 @@ public: [[nodiscard]] U32 InvocationId(); [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); + [[nodiscard]] F32 YDirection(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1cfc2a943..269de8ca5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -65,6 +65,7 @@ OPCODE(LocalInvocationId, U32x3, OPCODE(InvocationId, U32, ) OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) +OPCODE(YDirection, F32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 660b84c20..b0baff74b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -150,6 +150,8 @@ enum class SpecialRegister : u64 { return ir.SubgroupGtMask(); case SpecialRegister::SR_GEMASK: return ir.SubgroupGeMask(); + case SpecialRegister::SR_Y_DIRECTION: + return ir.BitCast(ir.YDirection()); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a2c2948d5..08242184f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -97,6 +97,8 @@ struct Profile { std::optional alpha_test_func; float alpha_test_reference{}; + bool y_negate{}; + std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 6a3baf837..24834e0f7 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -82,6 +82,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 5568c4f72..31de6b2c8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -202,6 +202,7 @@ struct FixedPipelineState { BitField<3, 1, u32> early_z; BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; + BitField<10, 1, u32> y_negate; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4d0d3ebb7..e9b93336b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1116,6 +1116,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, break; } profile.force_early_z = key.state.early_z != 0; + profile.y_negate = key.state.y_negate != 0; return profile; } From 04c459fc8d99b41fa8a03c49523599e9bf797f9d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Apr 2021 11:56:45 +0200 Subject: [PATCH 252/770] shader: Implement fine derivates constant propagation --- .../backend/spirv/emit_spirv.cpp | 3 + .../backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_warp.cpp | 8 +++ .../frontend/ir/ir_emitter.cpp | 8 +++ .../frontend/ir/ir_emitter.h | 4 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../ir_opt/collect_shader_info_pass.cpp | 4 ++ .../ir_opt/constant_propagation_pass.cpp | 67 +++++++++++++++++++ src/shader_recompiler/shader_info.h | 1 + 9 files changed, 101 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9ec970706..c4d5874ca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -341,6 +341,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (!ctx.profile.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } + if (info.uses_derivates) { + ctx.AddCapability(spv::Capability::DerivativeControl); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7949d08d0..dec4f434a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -529,4 +529,8 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id Id segmentation_mask); Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); + +Id EmitDPdyFine(EmitContext& ctx, Id op_a); + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 1c23ccc08..d53412204 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -183,4 +183,12 @@ Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { return ctx.OpFAdd(ctx.F32[1], result_a, result_b); } +Id EmitDPdxFine(EmitContext& ctx, Id op_a) { + return ctx.OpDPdxFine(ctx.F32[1], op_a); +} + +Id EmitDPdyFine(EmitContext& ctx, Id op_a) { + return ctx.OpDPdyFine(ctx.F32[1], op_a); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index c3e8d0681..845a57b1e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1925,4 +1925,12 @@ F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpCon return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); } +F32 IREmitter::DPdxFine(const F32& a) { + return Inst(Opcode::DPdxFine, a); +} + +F32 IREmitter::DPdyFine(const F32& a) { + return Inst(Opcode::DPdyFine, a); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7e67f5e30..c7101d668 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -353,6 +353,10 @@ public: [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control = {}); + [[nodiscard]] F32 DPdxFine(const F32& a); + + [[nodiscard]] F32 DPdyFine(const F32& a); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 269de8ca5..e4cb8964a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -511,3 +511,5 @@ OPCODE(ShuffleUp, U32, U32, OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) +OPCODE(DPdxFine, F32, F32, ) +OPCODE(DPdyFine, F32, F32, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60b7d3a36..e5688667b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -530,6 +530,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::DPdxFine: + case IR::Opcode::DPdyFine: + info.uses_derivates = true; + break; case IR::Opcode::LoadStorageU8: case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index ee73b5b60..983fb20ab 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -412,6 +412,71 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser inst.ReplaceUsesWith(*result); } +IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { + if (value.IsImmediate()) { + return value; + } + IR::Inst* const inst{value.InstRecursive()}; + if (inst->GetOpcode() == expected_cast) { + return inst->Arg(0).Resolve(); + } + return value; +} + +void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { + const IR::Value swizzle{inst.Arg(2)}; + if (!swizzle.IsImmediate()) { + return; + } + + const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; + const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; + + if (value_1.IsImmediate()) { + return; + } + + const u32 swizzle_value{swizzle.U32()}; + if (swizzle_value != 0x99 && swizzle_value != 0xA5) { + return; + } + + IR::Inst* const inst2{value_1.InstRecursive()}; + if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { + return; + } + const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; + if (value_2 != value_3) { + return; + } + + const IR::Value index{inst2->Arg(1)}; + const IR::Value clamp{inst2->Arg(2)}; + const IR::Value segmentation_mask{inst2->Arg(3)}; + + if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { + return; + } + + if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { + return; + } + + if (swizzle_value == 0x99) { + // DPdxFine + if (index.U32() == 1) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); + } + } else if (swizzle_value == 0xA5) { + // DPdyFine + if (index.U32() == 2) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); + } + } +} + void ConstantPropagation(IR::Block& block, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: @@ -532,6 +597,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::CompositeExtractF16x4: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, IR::Opcode::CompositeInsertF16x4); + case IR::Opcode::FSwizzleAdd: + return FoldFSwizzleAdd(block, inst); default: break; } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 0a8931930..3f22958e8 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -147,6 +147,7 @@ struct Info { bool uses_subgroup_vote{}; bool uses_subgroup_mask{}; bool uses_fswzadd{}; + bool uses_derivates{}; bool uses_typeless_image_reads{}; bool uses_typeless_image_writes{}; bool uses_shared_increment{}; From 080857b60e78836901cf6e9601f48613812fcd04 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Apr 2021 12:51:43 +0200 Subject: [PATCH 253/770] shader: Add coarse derivatives --- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 ++++ src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 ++ .../ir_opt/constant_propagation_pass.cpp | 8 -------- 7 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index dec4f434a..67d06faa0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -533,4 +533,8 @@ Id EmitDPdxFine(EmitContext& ctx, Id op_a); Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); + +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index d53412204..a255f9ba7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -191,4 +191,12 @@ Id EmitDPdyFine(EmitContext& ctx, Id op_a) { return ctx.OpDPdyFine(ctx.F32[1], op_a); } +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) { + return ctx.OpDPdxCoarse(ctx.F32[1], op_a); +} + +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) { + return ctx.OpDPdyCoarse(ctx.F32[1], op_a); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 845a57b1e..b3c9fe72a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1933,4 +1933,12 @@ F32 IREmitter::DPdyFine(const F32& a) { return Inst(Opcode::DPdyFine, a); } +F32 IREmitter::DPdxCoarse(const F32& a) { + return Inst(Opcode::DPdxCoarse, a); +} + +F32 IREmitter::DPdyCoarse(const F32& a) { + return Inst(Opcode::DPdyCoarse, a); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index c7101d668..4441c495d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -357,6 +357,10 @@ public: [[nodiscard]] F32 DPdyFine(const F32& a); + [[nodiscard]] F32 DPdxCoarse(const F32& a); + + [[nodiscard]] F32 DPdyCoarse(const F32& a); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e4cb8964a..b6869d4e4 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -513,3 +513,5 @@ OPCODE(ShuffleButterfly, U32, U32, OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) OPCODE(DPdxFine, F32, F32, ) OPCODE(DPdyFine, F32, F32, ) +OPCODE(DPdxCoarse, F32, F32, ) +OPCODE(DPdyCoarse, F32, F32, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e5688667b..7473e0bc2 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -532,6 +532,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; case IR::Opcode::DPdxFine: case IR::Opcode::DPdyFine: + case IR::Opcode::DPdxCoarse: + case IR::Opcode::DPdyCoarse: info.uses_derivates = true; break; case IR::Opcode::LoadStorageU8: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 983fb20ab..7e86f64a8 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -428,19 +428,15 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { if (!swizzle.IsImmediate()) { return; } - const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; - if (value_1.IsImmediate()) { return; } - const u32 swizzle_value{swizzle.U32()}; if (swizzle_value != 0x99 && swizzle_value != 0xA5) { return; } - IR::Inst* const inst2{value_1.InstRecursive()}; if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { return; @@ -449,19 +445,15 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { if (value_2 != value_3) { return; } - const IR::Value index{inst2->Arg(1)}; const IR::Value clamp{inst2->Arg(2)}; const IR::Value segmentation_mask{inst2->Arg(3)}; - if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { return; } - if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { return; } - if (swizzle_value == 0x99) { // DPdxFine if (index.U32() == 1) { From f69d0b91ffad7d9ab827f55a9297b8f6da815cc9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 09:07:48 +0200 Subject: [PATCH 254/770] shader: Address feedback --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 5 +---- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 2 +- src/shader_recompiler/shader_info.h | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c4d5874ca..5d6fdeb65 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -341,7 +341,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (!ctx.profile.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } - if (info.uses_derivates) { + if (info.uses_derivatives) { ctx.AddCapability(spv::Capability::DerivativeControl); } // TODO: Track this usage diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1030404c0..ed57e44a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -404,10 +404,7 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { } Id EmitYDirection(EmitContext& ctx) { - if (ctx.profile.y_negate) { - return ctx.Constant(ctx.F32[1], -1.0f); - } - return ctx.Constant(ctx.F32[1], 1.0f); + return ctx.Constant(ctx.F32[1], ctx.profile.y_negate ? -1.0f : 1.0f); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 7473e0bc2..0500a5141 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -534,7 +534,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::DPdyFine: case IR::Opcode::DPdxCoarse: case IR::Opcode::DPdyCoarse: - info.uses_derivates = true; + info.uses_derivatives = true; break; case IR::Opcode::LoadStorageU8: case IR::Opcode::LoadStorageS8: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3f22958e8..f808adeba 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -147,7 +147,7 @@ struct Info { bool uses_subgroup_vote{}; bool uses_subgroup_mask{}; bool uses_fswzadd{}; - bool uses_derivates{}; + bool uses_derivatives{}; bool uses_typeless_image_reads{}; bool uses_typeless_image_writes{}; bool uses_shared_increment{}; From 21a878237bcc6f19f41a4bce156714fd76be9d58 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 03:41:37 +0200 Subject: [PATCH 255/770] shader: Implement IADD3.CC/.X --- .../impl/integer_add_three_input.cpp | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index c2dbd7998..e88c0ffb6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -58,13 +58,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { BitField<51, 1, u64> neg_a; } iadd3{insn}; - if (iadd3.x != 0) { - throw NotImplementedException("IADD3 X"); - } - if (iadd3.cc != 0) { - throw NotImplementedException("IADD3 CC"); - } - IR::U32 op_a{v.X(iadd3.src_a)}; op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); @@ -81,10 +74,32 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { } IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; + IR::U1 of_1; + if (iadd3.cc != 0) { + of_1 = v.ir.GetOverflowFromOp(lhs); + } + if (iadd3.x != 0) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + lhs = v.ir.IAdd(lhs, carry); + } + if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { + IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; + of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); + } lhs = IntegerShift(v.ir, lhs, iadd3.shift); const IR::U32 result{v.ir.IAdd(lhs, op_c)}; v.X(iadd3.dest_reg, result); + if (iadd3.cc != 0) { + // TODO: How does CC behave when X is set? + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); + } } } // Anonymous namespace From 881b33da3ba16fc105c6ccd20f6fbc9c4552ead9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 03:42:40 +0200 Subject: [PATCH 256/770] shader: Implement F2F (Imm) --- ...oating_point_conversion_floating_point.cpp | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index ce2cf470d..61484df57 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -179,7 +179,33 @@ void TranslatorVisitor::F2F_cbuf(u64 insn) { } void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { - throw NotImplementedException("Instruction"); -} + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + BitField<20, 20, u64> imm; + + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + const u32 imm{static_cast(f2f.imm & 0x00ffff)}; + IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, 0)}; + break; + } + case FloatFormat::F32: + src_a = GetFloatImm20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleImm20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} // namespace Shader::Maxwell } // namespace Shader::Maxwell From 29990289767c41c162473c9775ad3ba08e7ee9ea Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 10:08:22 +0200 Subject: [PATCH 257/770] shader: Address feedback --- .../floating_point_conversion_floating_point.cpp | 15 +++++++++------ .../translate/impl/integer_add_three_input.cpp | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 61484df57..02ab023c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -184,16 +184,19 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { BitField<49, 1, u64> abs; BitField<10, 2, FloatFormat> src_size; BitField<41, 1, u64> selector; - BitField<20, 20, u64> imm; - + BitField<20, 19, u64> imm; + BitField<56, 1, u64> imm_neg; } const f2f{insn}; IR::F16F32F64 src_a; switch (f2f.src_size) { case FloatFormat::F16: { - const u32 imm{static_cast(f2f.imm & 0x00ffff)}; - IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; - src_a = IR::F16{ir.CompositeExtract(vector, 0)}; + const u32 imm{static_cast(f2f.imm & 0x0000ffff)}; + const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; + if (f2f.imm_neg != 0) { + throw NotImplementedException("Neg bit on F16"); + } break; } case FloatFormat::F32: @@ -206,6 +209,6 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); } F2F(*this, insn, src_a, f2f.abs != 0); -} // namespace Shader::Maxwell +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index e88c0ffb6..15da90365 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -83,7 +83,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { lhs = v.ir.IAdd(lhs, carry); } if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { - IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; + const IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); } lhs = IntegerShift(v.ir, lhs, iadd3.shift); From 5b8afed87115c82cb48913fd47dfbfa347e4faa5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 18 Apr 2021 20:47:31 -0400 Subject: [PATCH 258/770] spirv: Replace Constant/ConstantComposite with Const helper --- .../backend/spirv/emit_context.cpp | 69 +++++++++---------- .../backend/spirv/emit_context.h | 2 +- .../backend/spirv/emit_spirv_atomic.cpp | 16 ++--- .../backend/spirv/emit_spirv_barriers.cpp | 9 ++- .../spirv/emit_spirv_context_get_set.cpp | 38 +++++----- .../spirv/emit_spirv_floating_point.cpp | 6 +- .../backend/spirv/emit_spirv_image.cpp | 21 +++--- .../backend/spirv/emit_spirv_integer.cpp | 2 +- .../backend/spirv/emit_spirv_memory.cpp | 4 +- .../spirv/emit_spirv_shared_memory.cpp | 30 ++++---- .../backend/spirv/emit_spirv_special.cpp | 8 +-- .../backend/spirv/emit_spirv_warp.cpp | 6 +- 12 files changed, 100 insertions(+), 111 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b9e6d5655..214ef9c25 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -131,13 +131,13 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, case Stage::TessellationControl: case Stage::TessellationEval: if (per_invocation) { - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 32u)); + type = ctx.TypeArray(type, ctx.Const(32u)); } break; case Stage::Geometry: if (per_invocation) { const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + type = ctx.TypeArray(type, ctx.Const(num_vertices)); } break; default: @@ -149,7 +149,7 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, Id DefineOutput(EmitContext& ctx, Id type, std::optional invocations, std::optional builtin = std::nullopt) { if (invocations && ctx.stage == Stage::TessellationControl) { - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], *invocations)); + type = ctx.TypeArray(type, ctx.Const(*invocations)); } return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } @@ -224,7 +224,7 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 65536U / element_size))}; + const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{ctx.TypeStruct(array_type)}; @@ -328,7 +328,7 @@ Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_p const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; const Id cas_func{CasFunction(ctx, operation, value_type)}; const Id zero{ctx.u32_zero_value}; - const Id scope_id{ctx.Constant(ctx.U32[1], static_cast(scope))}; + const Id scope_id{ctx.Const(static_cast(scope))}; const Id loop_header{ctx.OpLabel()}; const Id continue_block{ctx.OpLabel()}; @@ -428,11 +428,11 @@ Id EmitContext::Def(const IR::Value& value) { case IR::Type::U1: return value.U1() ? true_value : false_value; case IR::Type::U32: - return Constant(U32[1], value.U32()); + return Const(value.U32()); case IR::Type::U64: return Constant(U64, value.U64()); case IR::Type::F32: - return Constant(F32[1], value.F32()); + return Const(value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); case IR::Type::Label: @@ -486,8 +486,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { void EmitContext::DefineCommonConstants() { true_value = ConstantTrue(U1); false_value = ConstantFalse(U1); - u32_zero_value = Constant(U32[1], 0U); - f32_zero_value = Constant(F32[1], 0.0f); + u32_zero_value = Const(0U); + f32_zero_value = Const(0.0f); } void EmitContext::DefineInterfaces(const IR::Program& program) { @@ -500,7 +500,7 @@ void EmitContext::DefineLocalMemory(const IR::Program& program) { return; } const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; - const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id type{TypeArray(U32[1], Const(num_elements))}; const Id pointer{TypePointer(spv::StorageClass::Private, type)}; local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); if (profile.supported_spirv >= 0x00010400) { @@ -514,7 +514,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } const auto make{[&](Id element_type, u32 element_size) { const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; - const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; + const Id array_type{TypeArray(element_type, Const(num_elements))}; Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{TypeStruct(array_type)}; @@ -549,7 +549,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; - const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id type{TypeArray(U32[1], Const(num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); @@ -569,10 +569,10 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { OpBranch(loop_header); AddLabel(loop_header); - const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; - const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; - const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; - const Id count{Constant(U32[1], size)}; + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; + const Id count{Const(size)}; OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); OpBranch(continue_block); @@ -580,9 +580,8 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; const Id old_value{OpLoad(U32[1], word_pointer)}; const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), - u32_zero_value, u32_zero_value, new_value, - old_value)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, + u32_zero_value, new_value, old_value)}; const Id success{OpIEqual(U1, atomic_res, old_value)}; OpBranchConditional(success, merge_block, loop_header); @@ -623,9 +622,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; AddLabel(); - const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; - const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; - const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Constant(U32[1], 2U))}; + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; if (info.loads_position) { @@ -643,7 +642,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); OpSwitch(compare_index, default_label, literals, labels); AddLabel(default_label); - OpReturnValue(Constant(F32[1], 0.0f)); + OpReturnValue(Const(0.0f)); size_t label_index{0}; if (info.loads_position) { AddLabel(labels[label_index]); @@ -661,7 +660,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(labels[label_index]); const auto type{AttrTypes(*this, static_cast(i))}; if (!type) { - OpReturnValue(Constant(F32[1], 0.0f)); + OpReturnValue(Const(0.0f)); ++label_index; continue; } @@ -688,9 +687,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id offset{OpFunctionParameter(U32[1])}; const Id store_value{OpFunctionParameter(F32[1])}; AddLabel(); - const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; - const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; - const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Constant(U32[1], 2U))}; + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; if (info.stores_position) { @@ -744,7 +743,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturn(); ++label_index; AddLabel(labels[label_index]); - const Id fixed_index{OpIAdd(U32[1], masked_index, Constant(U32[1], 4))}; + const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; OpStore(pointer2, store_value); OpReturn(); @@ -1018,9 +1017,9 @@ void EmitContext::DefineInputs(const Info& info) { DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); } if (info.uses_fswzadd) { - const Id f32_one{Constant(F32[1], 1.0f)}; - const Id f32_minus_one{Constant(F32[1], -1.0f)}; - const Id f32_zero{Constant(F32[1], 0.0f)}; + const Id f32_one{Const(1.0f)}; + const Id f32_minus_one{Const(-1.0f)}; + const Id f32_zero{Const(0.0f)}; fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); @@ -1118,7 +1117,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } - const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; + const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores_layer && @@ -1136,7 +1135,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } if (info.stores_viewport_mask && profile.support_viewport_mask) { - viewport_mask = DefineOutput(*this, TypeArray(U32[1], Constant(U32[1], 1u)), std::nullopt); + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { @@ -1146,13 +1145,13 @@ void EmitContext::DefineOutputs(const IR::Program& program) { switch (stage) { case Stage::TessellationControl: if (info.stores_tess_level_outer) { - const Id type{TypeArray(F32[1], Constant(U32[1], 4))}; + const Id type{TypeArray(F32[1], Const(4U))}; output_tess_level_outer = DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); Decorate(output_tess_level_outer, spv::Decoration::Patch); } if (info.stores_tess_level_inner) { - const Id type{TypeArray(F32[1], Constant(U32[1], 2))}; + const Id type{TypeArray(F32[1], Const(2U))}; output_tess_level_inner = DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); Decorate(output_tess_level_inner, spv::Decoration::Patch); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 7567fdcac..ef8507367 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -114,7 +114,7 @@ public: } Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { - return ConstantComposite(U32[2], Const(element_1), Const(element_2), Const(element_3), + return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3), Const(element_4)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index c2c879a6c..6e17d1c7e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -7,10 +7,10 @@ namespace Shader::Backend::SPIRV { namespace { Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; if (index_offset > 0) { - index = ctx.OpIAdd(ctx.U32[1], index, ctx.Constant(ctx.U32[1], index_offset)); + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); } return ctx.profile.support_explicit_workgroup_layout ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) @@ -20,14 +20,14 @@ Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { if (offset.IsImmediate()) { const u32 imm_offset{static_cast(offset.U32() / element_size)}; - return ctx.Constant(ctx.U32[1], imm_offset); + return ctx.Const(imm_offset); } const u32 shift{static_cast(std::countr_zero(element_size))}; const Id index{ctx.Def(offset)}; if (shift == 0) { return index; } - const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id shift_id{ctx.Const(shift)}; return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } @@ -43,7 +43,7 @@ Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, } std::pair AtomicArgs(EmitContext& ctx) { - const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; + const Id scope{ctx.Const(static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; return {scope, semantics}; } @@ -103,13 +103,13 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { } Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); } Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); } @@ -132,7 +132,7 @@ Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { - const Id shift_id{ctx.Constant(ctx.U32[1], 3U)}; + const Id shift_id{ctx.Const(3U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 366dc6a0c..705aebd81 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -12,8 +12,7 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory}; - ctx.OpMemoryBarrier(ctx.Constant(ctx.U32[1], static_cast(scope)), - ctx.Constant(ctx.U32[1], static_cast(semantics))); + ctx.OpMemoryBarrier(ctx.Const(static_cast(scope)), ctx.Const(static_cast(semantics))); } } // Anonymous namespace @@ -22,9 +21,9 @@ void EmitBarrier(EmitContext& ctx) { const auto memory{spv::Scope::Workgroup}; const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory}; - ctx.OpControlBarrier(ctx.Constant(ctx.U32[1], static_cast(execution)), - ctx.Constant(ctx.U32[1], static_cast(memory)), - ctx.Constant(ctx.U32[1], static_cast(memory_semantics))); + ctx.OpControlBarrier(ctx.Const(static_cast(execution)), + ctx.Const(static_cast(memory)), + ctx.Const(static_cast(memory_semantics))); } void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ed57e44a2..5cc9d0d39 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -69,7 +69,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return info.id; } else { const u32 index_element{element - info.first_element}; - const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; + const Id index_id{ctx.Const(index_element)}; return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } @@ -81,7 +81,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionZ: case IR::Attribute::PositionW: { const u32 element{static_cast(attr) % 4}; - const Id element_id{ctx.Constant(ctx.U32[1], element)}; + const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } case IR::Attribute::ClipDistance0: @@ -94,7 +94,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::ClipDistance7: { const u32 base{static_cast(IR::Attribute::ClipDistance0)}; const u32 index{static_cast(attr) - base}; - const Id clip_num{ctx.Constant(ctx.U32[1], index)}; + const Id clip_num{ctx.Const(index)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::Layer: @@ -131,7 +131,7 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, Id index{ctx.Def(offset)}; if (element_size > 1) { const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; - const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; + const Id shift{ctx.Const(log2_element_size)}; index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); } const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; @@ -140,7 +140,7 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, if (offset.U32() % element_size != 0) { throw NotImplementedException("Unaligned immediate constant buffer load"); } - const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; + const Id imm_offset{ctx.Const(offset.U32() / element_size)}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; return ctx.OpLoad(result_type, access_chain); } @@ -212,13 +212,13 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + const auto element_id{[&] { return ctx.Const(element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; if (!type) { // Attribute is disabled - return ctx.Constant(ctx.F32[1], 0.0f); + return ctx.Const(0.0f); } const Id generic_id{ctx.input_generics.at(index)}; const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; @@ -252,20 +252,19 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { } case IR::Attribute::FrontFace: return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), - ctx.Constant(ctx.U32[1], std::numeric_limits::max()), - ctx.u32_zero_value); + ctx.Const(std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); case IR::Attribute::TessellationEvaluationPointU: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); case IR::Attribute::TessellationEvaluationPointV: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); default: throw NotImplementedException("Read attribute {}", attr); @@ -303,7 +302,7 @@ Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { throw NotImplementedException("Non-generic patch load"); } const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)}; return ctx.OpLoad(ctx.F32[1], pointer); } @@ -312,7 +311,7 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { const Id pointer{[&] { if (IR::IsGeneric(patch)) { const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); } switch (patch) { @@ -321,15 +320,14 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { case IR::Patch::TessellationLodTop: case IR::Patch::TessellationLodBottom: { const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; - const Id index_id{ctx.Constant(ctx.U32[1], index)}; + const Id index_id{ctx.Const(index)}; return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); } case IR::Patch::TessellationLodInteriorU: return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.u32_zero_value); case IR::Patch::TessellationLodInteriorV: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, - ctx.Constant(ctx.U32[1], 1u)); + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u)); default: throw NotImplementedException("Patch {}", patch); } @@ -338,7 +336,7 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { } void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { - const Id component_id{ctx.Constant(ctx.U32[1], component)}; + const Id component_id{ctx.Const(component)}; const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; ctx.OpStore(pointer, value); } @@ -404,7 +402,7 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { } Id EmitYDirection(EmitContext& ctx) { - return ctx.Constant(ctx.F32[1], ctx.profile.y_negate ? -1.0f : 1.0f); + return ctx.Const(ctx.profile.y_negate ? -1.0f : 1.0f); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 24300af39..97d11cc63 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -117,7 +117,7 @@ Id EmitFPLog2(EmitContext& ctx, Id value) { } Id EmitFPRecip32(EmitContext& ctx, Id value) { - return ctx.OpFDiv(ctx.F32[1], ctx.Constant(ctx.F32[1], 1.0f), value); + return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value); } Id EmitFPRecip64(EmitContext& ctx, Id value) { @@ -143,8 +143,8 @@ Id EmitFPSaturate16(EmitContext& ctx, Id value) { } Id EmitFPSaturate32(EmitContext& ctx, Id value) { - const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; - const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; + const Id zero{ctx.Const(f32{0.0})}; + const Id one{ctx.Const(f32{1.0})}; return Clamp(ctx, ctx.F32[1], value, zero, one); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7a4388e7e..90817f161 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -45,16 +45,12 @@ public: if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](unsigned int a, unsigned int b) { - return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); - }}; + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; - const Id offsets{ - ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), - ctx.ConstantComposite(ctx.U32[2], read(0, 0), read(0, 1)), - ctx.ConstantComposite(ctx.U32[2], read(0, 2), read(0, 3)), - ctx.ConstantComposite(ctx.U32[2], read(1, 0), read(1, 1)), - ctx.ConstantComposite(ctx.U32[2], read(1, 2), read(1, 3)))}; + const Id offsets{ctx.ConstantComposite( + ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)), + ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)), + ctx.Const(read(1, 2), read(1, 3)))}; Add(spv::ImageOperandsMask::ConstOffsets, offsets); } @@ -108,7 +104,7 @@ private: return; } if (offset.IsImmediate()) { - Add(spv::ImageOperandsMask::ConstOffset, ctx.Constant(ctx.U32[1], offset.U32())); + Add(spv::ImageOperandsMask::ConstOffset, ctx.Const(offset.U32())); return; } IR::Inst* const inst{offset.InstRecursive()}; @@ -361,9 +357,8 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, - ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), - operands.Span()); + ctx.F32[4], Texture(ctx, index), coords, ctx.Const(info.gather_component), + operands.Mask(), operands.Span()); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 944f1e429..c12d0a513 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -44,7 +44,7 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; - const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a8f2ea5a0..7bf828995 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -11,14 +11,14 @@ namespace { Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { if (offset.IsImmediate()) { const u32 imm_offset{static_cast(offset.U32() / element_size)}; - return ctx.Constant(ctx.U32[1], imm_offset); + return ctx.Const(imm_offset); } const u32 shift{static_cast(std::countr_zero(element_size))}; const Id index{ctx.Def(offset)}; if (shift == 0) { return index; } - const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id shift_id{ctx.Const(shift)}; return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index fa2fc9ab4..710d1cd25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -7,22 +7,22 @@ namespace Shader::Backend::SPIRV { namespace { Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { - const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id shift_id{ctx.Const(shift)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); } Id Word(EmitContext& ctx, Id offset) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { - const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; - const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; - const Id count_id{ctx.Constant(ctx.U32[1], count)}; + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))}; + const Id count_id{ctx.Const(count)}; return {bit, count_id}; } } // Anonymous namespace @@ -83,9 +83,9 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; return ctx.OpLoad(ctx.U32[2], pointer); } else { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), @@ -98,12 +98,11 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; return ctx.OpLoad(ctx.U32[4], pointer); } - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; std::array values{}; for (u32 i = 0; i < 4; ++i) { - const Id index{i == 0 ? base_index - : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; values[i] = ctx.OpLoad(ctx.U32[1], pointer); } @@ -134,7 +133,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { if (ctx.profile.support_explicit_workgroup_layout) { pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); } else { - const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); } @@ -147,9 +146,9 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { ctx.OpStore(pointer, value); return; } - const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); @@ -162,11 +161,10 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { ctx.OpStore(pointer, value); return; } - const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { - const Id index{i == 0 ? base_index - : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index ae8b39f41..d5430e905 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -19,7 +19,7 @@ void ConvertDepthMode(EmitContext& ctx) { void SetFixedPipelinePointSize(EmitContext& ctx) { if (ctx.profile.fixed_state_point_size) { const float point_size{*ctx.profile.fixed_state_point_size}; - ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); + ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); } } @@ -75,7 +75,7 @@ void AlphaTest(EmitContext& ctx) { const Id true_label{ctx.OpLabel()}; const Id discard_label{ctx.OpLabel()}; - const Id alpha_reference{ctx.Constant(ctx.F32[1], ctx.profile.alpha_test_reference)}; + const Id alpha_reference{ctx.Const(ctx.profile.alpha_test_reference)}; const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); @@ -88,8 +88,8 @@ void AlphaTest(EmitContext& ctx) { void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB) { - const Id zero{ctx.Constant(ctx.F32[1], 0.0f)}; - const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; + const Id zero{ctx.Const(0.0f)}; + const Id one{ctx.Const(1.0f)}; const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; ctx.OpStore(ctx.output_position, default_vector); for (const auto& info : ctx.output_generics) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index a255f9ba7..239e2ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -54,7 +54,7 @@ Id EmitLaneId(EmitContext& ctx) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { return id; } - return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Constant(ctx.U32[1], 31U)); + return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); } Id EmitVoteAll(EmitContext& ctx, Id pred) { @@ -168,10 +168,10 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id } Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { - const Id three{ctx.Constant(ctx.U32[1], 3)}; + const Id three{ctx.Const(3U)}; Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); - mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1)); + mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); From 6325601947a523b1aea192a51ccd9e417bf0646c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:23:52 -0300 Subject: [PATCH 259/770] spirv: Fix ViewportMask --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 214ef9c25..2ffa8c453 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1135,7 +1135,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } if (info.stores_viewport_mask && profile.support_viewport_mask) { - viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt); + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, + spv::BuiltIn::ViewportMaskNV); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { From 7018e524f5e6217b3259333acc4ea09ad036d331 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:33:23 -0300 Subject: [PATCH 260/770] shader: Add NVN storage buffer fallbacks When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime. --- .../backend/spirv/emit_context.cpp | 77 +++++++++++++++++++ .../backend/spirv/emit_context.h | 8 ++ .../backend/spirv/emit_spirv.h | 16 ++-- .../backend/spirv/emit_spirv_integer.cpp | 8 +- .../backend/spirv/emit_spirv_memory.cpp | 24 +++--- .../frontend/maxwell/program.cpp | 43 +++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 53 +++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 46 ++--------- src/shader_recompiler/shader_info.h | 1 + 9 files changed, 214 insertions(+), 62 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2ffa8c453..7f16cb0dc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -411,6 +411,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineTextures(program.info, binding); DefineImages(program.info, binding); DefineAttributeMemAccess(program.info); + DefineGlobalMemoryFunctions(program.info); DefineLabels(program); } @@ -762,6 +763,82 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { } } +void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { + if (!info.uses_global_memory) { + return; + } + using DefPtr = Id StorageDefinitions::*; + const Id zero{u32_zero_value}; + const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, + auto&& callback) { + AddLabel(); + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; + const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; + const Id ssbo_addr_pointer{OpAccessChain( + uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; + const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, + zero, ssbo_size_cbuf_offset)}; + + const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; + const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; + const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; + const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), + OpULessThan(U1, addr, ssbo_end))}; + const Id then_label{OpLabel()}; + const Id else_label{OpLabel()}; + OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(cond, then_label, else_label); + AddLabel(then_label); + const Id ssbo_id{ssbos[index].*ssbo_member}; + const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; + const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; + const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; + callback(ssbo_pointer); + AddLabel(else_label); + } + }}; + const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(type, U64)}; + const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + define_body(ssbo_member, addr, element_pointer, shift, + [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); + OpReturnValue(ConstantNull(type)); + OpFunctionEnd(); + return func_id; + }}; + const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { + const Id function_type{TypeFunction(void_id, U64, type)}; + const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; + const Id addr{OpFunctionParameter(U64)}; + const Id data{OpFunctionParameter(type)}; + define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { + OpStore(ssbo_pointer, data); + OpReturn(); + }); + OpReturn(); + OpFunctionEnd(); + return func_id; + }}; + const auto define{ + [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { + const Id element_type{type_def.element}; + const u32 shift{static_cast(std::countr_zero(size))}; + const Id load_func{define_load(ssbo_member, element_type, type, shift)}; + const Id write_func{define_write(ssbo_member, element_type, type, shift)}; + return std::make_pair(load_func, write_func); + }}; + std::tie(load_global_func_u32, write_global_func_u32) = + define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); + std::tie(load_global_func_u32x2, write_global_func_u32x2) = + define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); + std::tie(load_global_func_u32x4, write_global_func_u32x4) = + define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index ef8507367..a4503c7ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -224,6 +224,13 @@ public: Id f32x2_min_cas{}; Id f32x2_max_cas{}; + Id load_global_func_u32{}; + Id load_global_func_u32x2{}; + Id load_global_func_u32x4{}; + Id write_global_func_u32{}; + Id write_global_func_u32x2{}; + Id write_global_func_u32x4{}; + Id input_position{}; std::array input_generics{}; @@ -255,6 +262,7 @@ private: void DefineTextures(const Info& info, u32& binding); void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); + void DefineGlobalMemoryFunctions(const Info& info); void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 67d06faa0..89a82e858 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -84,16 +84,16 @@ void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx); -void EmitLoadGlobal64(EmitContext& ctx); -void EmitLoadGlobal128(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); void EmitWriteGlobalU8(EmitContext& ctx); void EmitWriteGlobalS8(EmitContext& ctx); void EmitWriteGlobalU16(EmitContext& ctx); void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx); -void EmitWriteGlobal64(EmitContext& ctx); -void EmitWriteGlobal128(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); @@ -277,9 +277,9 @@ Id EmitFPIsNan16(EmitContext& ctx, Id value); Id EmitFPIsNan32(EmitContext& ctx, Id value); Id EmitFPIsNan64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -void EmitIAdd64(EmitContext& ctx); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); -void EmitISub64(EmitContext& ctx); +Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index c12d0a513..cd5b1f42c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -55,16 +55,16 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return result; } -void EmitIAdd64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIAdd(ctx.U64, a, b); } Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitISub64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitISub64(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.U64, a, b); } Id EmitIMul32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 7bf828995..8849258e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -64,16 +64,16 @@ void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitLoadGlobal32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadGlobal32(EmitContext& ctx, Id address) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); } -void EmitLoadGlobal64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadGlobal64(EmitContext& ctx, Id address) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); } -void EmitLoadGlobal128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadGlobal128(EmitContext& ctx, Id address) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); } void EmitWriteGlobalU8(EmitContext&) { @@ -92,16 +92,16 @@ void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitWriteGlobal32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); } -void EmitWriteGlobal64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); } -void EmitWriteGlobal128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 20a1d61cc..14180dcd9 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + continue; + } + // Assume these are written for now + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = true, + }); + } +} } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, @@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool #include +#include "common/alignment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce storage_buffer.offset < bias.offset_end; } -/// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { - IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - const IR::Value zero{u32{0}}; - switch (inst.GetOpcode()) { - case IR::Opcode::LoadGlobalS8: - case IR::Opcode::LoadGlobalU8: - case IR::Opcode::LoadGlobalS16: - case IR::Opcode::LoadGlobalU16: - case IR::Opcode::LoadGlobal32: - inst.ReplaceUsesWith(zero); - break; - case IR::Opcode::LoadGlobal64: - inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); - break; - case IR::Opcode::LoadGlobal128: - inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); - break; - case IR::Opcode::WriteGlobalS8: - case IR::Opcode::WriteGlobalU8: - case IR::Opcode::WriteGlobalS16: - case IR::Opcode::WriteGlobalU16: - case IR::Opcode::WriteGlobal32: - case IR::Opcode::WriteGlobal64: - case IR::Opcode::WriteGlobal128: - inst.Invalidate(); - break; - default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst.GetOpcode()); - } -} - struct LowAddrInfo { IR::U32 value; s32 imm_offset; @@ -350,6 +318,10 @@ std::optional Track(const IR::Value& value, const Bias* bias) .index{index.U32()}, .offset{offset.U32()}, }; + if (!Common::IsAligned(storage_buffer.offset, 16)) { + // The SSBO pointer has to be aligned + return std::nullopt; + } if (bias && !MeetsBias(storage_buffer, *bias)) { // We have to blacklist some addresses in case we wrongly // point to them @@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { - DiscardGlobalMemory(block, inst); + // Failed to track the low address, use NVN fallbacks return; } // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; - std::optional storage_buffer{Track(low_addr, &nvn_bias)}; + std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { // If it fails, track without a bias storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { - // If that also failed, drop the global memory usage - // LOG_ERROR - DiscardGlobalMemory(block, inst); + // If that also fails, use NVN fallbacks return; } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f808adeba..50b4d1c05 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -162,6 +162,7 @@ struct Info { bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; bool uses_int64_bit_atomics{}; + bool uses_global_memory{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; From 469f8bb85750792cf35d5a5aa72fa5a73cf33c82 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:36:17 -0300 Subject: [PATCH 261/770] shader: Simplify code for local memory --- .../translate/impl/load_store_local_shared.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index e24b49721..20df163f2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -34,6 +34,15 @@ IR::U32 Offset(TranslatorVisitor& v, u64 insn) { } } +std::pair WordOffset(TranslatorVisitor& v, u64 insn) { + const IR::U32 offset{Offset(v, insn)}; + if (offset.IsImmediate()) { + return {v.ir.Imm32(offset.U32() / 4), offset}; + } else { + return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; + } +} + std::pair GetSize(u64 insn) { union { u64 raw; @@ -79,9 +88,7 @@ IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { } // Anonymous namespace void TranslatorVisitor::LDL(u64 insn) { - const IR::U32 offset{Offset(*this, insn)}; - const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; - + const auto [word_offset, offset]{WordOffset(*this, insn)}; const IR::Reg dest{Reg(insn)}; const auto [bit_size, is_signed]{GetSize(insn)}; switch (bit_size) { @@ -133,9 +140,7 @@ void TranslatorVisitor::LDS(u64 insn) { } void TranslatorVisitor::STL(u64 insn) { - const IR::U32 offset{Offset(*this, insn)}; - const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; - + const auto [word_offset, offset]{WordOffset(*this, insn)}; const IR::Reg reg{Reg(insn)}; const IR::U32 src{X(reg)}; const int bit_size{GetSize(insn).first}; From a8d46a5eae4e0d65c08f6574cffc48bc55d00940 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:36:57 -0300 Subject: [PATCH 262/770] shader: Add constant propagation for arithmetic right shifts --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7e86f64a8..2206f93c2 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -482,6 +482,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::IMul32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); return; + case IR::Opcode::ShiftRightArithmetic32: + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast(a >> b); }); + return; case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: From dd860b684c7695097107c1186e96a70e754e5990 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 19:48:45 -0300 Subject: [PATCH 263/770] shader: Implement D3D samplers --- src/shader_recompiler/environment.h | 2 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 80 ++++++++++++++++--- src/shader_recompiler/shader_info.h | 6 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 31 ++++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 25 ++++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 32 +++----- 6 files changed, 127 insertions(+), 49 deletions(-) diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 1c50ae51e..090bc1c08 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -17,7 +17,7 @@ public: [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; - [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e1d5a2ce1..5ac485522 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,9 @@ namespace { struct ConstBufferAddr { u32 index; u32 offset; + u32 secondary_index; + u32 secondary_offset; + bool has_secondary; }; struct TextureInst { @@ -109,9 +112,38 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } +std::optional TryGetConstBuffer(const IR::Inst* inst); + +std::optional Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); +} + std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + switch (inst->GetOpcode()) { + default: return std::nullopt; + case IR::Opcode::BitwiseOr32: { + std::optional lhs{Track(inst->Arg(0))}; + std::optional rhs{Track(inst->Arg(1))}; + if (!lhs || !rhs) { + return std::nullopt; + } + if (lhs->has_secondary || rhs->has_secondary) { + return std::nullopt; + } + if (lhs->index > rhs->index || lhs->offset > rhs->offset) { + std::swap(lhs, rhs); + } + return ConstBufferAddr{ + .index = lhs->index, + .offset = lhs->offset, + .secondary_index = rhs->index, + .secondary_offset = rhs->offset, + .has_secondary = true, + }; + } + case IR::Opcode::GetCbufU32: + break; } const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; @@ -127,13 +159,12 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { return ConstBufferAddr{ .index{index.U32()}, .offset{offset.U32()}, + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } -std::optional Track(const IR::Value& value) { - return IR::BreadthFirstSearch(value, TryGetConstBuffer); -} - TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { @@ -146,6 +177,9 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = ConstBufferAddr{ .index = env.TextureBoundBuffer(), .offset = inst.Arg(0).U32(), + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } return TextureInst{ @@ -155,6 +189,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { }; } +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + const u32 secondary_index{cbuf.has_secondary ? cbuf.index : cbuf.secondary_index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.offset : cbuf.secondary_offset}; + const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; + const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; + return env.ReadTextureType(lhs_raw | rhs_raw); +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -167,8 +209,11 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + return desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -181,8 +226,12 @@ public: u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -247,14 +296,14 @@ void TexturePass(Environment& env, IR::Program& program) { auto flags{inst->Flags()}; switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: - flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + flags.type.Assign(ReadTextureType(env, cbuf)); inst->SetFlags(flags); break; case IR::Opcode::ImageFetch: if (flags.type != TextureType::Color1D) { break; } - if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + if (ReadTextureType(env, cbuf) == TextureType::Buffer) { // Replace with the bound texture type only when it's a texture buffer // If the instruction is 1D and the bound type is 2D, don't change the code and let // the rasterizer robustness handle it @@ -270,6 +319,9 @@ void TexturePass(Environment& env, IR::Program& program) { switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: case IR::Opcode::ImageWrite: { + if (cbuf.has_secondary) { + throw NotImplementedException("Unexpected separate sampler"); + } const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ @@ -294,16 +346,22 @@ void TexturePass(Environment& env, IR::Program& program) { default: if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 50b4d1c05..0f45bdfb6 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -61,8 +61,11 @@ struct StorageBufferDescriptor { }; struct TextureBufferDescriptor { + bool has_secondary; u32 cbuf_index; u32 cbuf_offset; + u32 secondary_cbuf_index; + u32 secondary_cbuf_offset; u32 count; }; using TextureBufferDescriptors = boost::container::small_vector; @@ -79,8 +82,11 @@ using ImageBufferDescriptors = boost::container::small_vector; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3c907ec5a..45d837ca4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -88,23 +88,34 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, boost::container::static_vector image_view_indices; boost::container::static_vector samplers; - const auto& launch_desc{kepler_compute.launch_description}; - const auto& cbufs{launch_desc.const_buffer_config}; - const bool via_header_index{launch_desc.linked_tsc}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d5e9dae0f..08f00b9ce 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,20 +169,31 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(cbufs[cbuf_index].enabled); - const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto read_handle{[&](const auto& desc) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e9b93336b..4317b2ac7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -188,9 +188,7 @@ protected: } Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, - u32 cbuf_offset) { - const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + u32 raw) { const TextureHandle handle{raw, via_header_index}; const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; @@ -219,7 +217,7 @@ protected: throw Shader::NotImplementedException("Unknown texture type"); } }()}; - texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + texture_types.emplace(raw, result); return result; } @@ -227,7 +225,7 @@ protected: GPUVAddr program_base{}; std::vector code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; u32 local_memory_size{}; @@ -250,7 +248,7 @@ using Shader::Maxwell::TranslateProgram; // TODO: Move this to a separate file constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{1}; +constexpr u32 CACHE_VERSION{2}; class GraphicsEnvironment final : public GenericEnvironment { public: @@ -308,13 +306,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{maxwell3d->regs}; - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, - cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); } private: @@ -352,13 +347,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, - cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); } private: @@ -421,7 +413,7 @@ public: code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { - u64 key; + u32 key; Shader::TextureType type; file.read(reinterpret_cast(&key), sizeof(key)) .read(reinterpret_cast(&type), sizeof(type)); @@ -457,8 +449,8 @@ public: return it->second; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + Shader::TextureType ReadTextureType(u32 handle) override { + const auto it{texture_types.find(handle)}; if (it == texture_types.end()) { throw Shader::LogicError("Uncached read texture type"); } @@ -483,7 +475,7 @@ public: private: std::unique_ptr code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; From 79c2e43fcd5a254121d48e6957ac159041c4fac0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 22:20:46 -0300 Subject: [PATCH 264/770] shader: Calculate number of arguments in an opcode at compile time --- src/shader_recompiler/frontend/ir/opcodes.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 7f04b647b..4207d548c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -57,6 +57,17 @@ constexpr std::array META_TABLE{ #undef OPCODE }; +constexpr size_t CalculateNumArgsOf(Opcode op) { + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); +} + +constexpr std::array NUM_ARGS{ +#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), +#include "opcodes.inc" +#undef OPCODE +}; + void ValidateOpcode(Opcode op) { const size_t raw{static_cast(op)}; if (raw >= META_TABLE.size()) { @@ -72,9 +83,7 @@ Type TypeOf(Opcode op) { size_t NumArgsOf(Opcode op) { ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; - return static_cast(distance); + return NUM_ARGS[static_cast(op)]; } Type ArgTypeOf(Opcode op, size_t arg_index) { From 24cc29866036350e538100a64ea8bcd1bfba1468 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 22:21:16 -0300 Subject: [PATCH 265/770] shader: Use a small_vector for phi blocks --- src/shader_recompiler/frontend/ir/microinstruction.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 97dc91d85..dc9f683fe 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -10,6 +10,7 @@ #include #include +#include #include #include "common/bit_cast.h" @@ -131,7 +132,7 @@ private: u32 definition{}; union { NonTriviallyDummy dummy{}; - std::vector> phi_args; + boost::container::small_vector, 2> phi_args; std::array args; }; std::unique_ptr associated_insts; From 4bbe5303376e693d15d7de80b25f5fda783281ce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 22:28:06 -0300 Subject: [PATCH 266/770] shader: Inline common IR::Block methods --- .../frontend/ir/basic_block.cpp | 12 ------------ .../frontend/ir/basic_block.h | 19 +++++++++++++------ 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e1f0191f4..f92fc2571 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -69,24 +69,12 @@ u32 Block::LocationEnd() const noexcept { return location_end; } -Block::InstructionList& Block::Instructions() noexcept { - return instructions; -} - -const Block::InstructionList& Block::Instructions() const noexcept { - return instructions; -} - void Block::AddImmediatePredecessor(Block* block) { if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { imm_predecessors.push_back(block); } } -std::span Block::ImmediatePredecessors() const noexcept { - return imm_predecessors; -} - static std::string BlockToIndex(const std::map& block_to_index, Block* block) { if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index b14a35ec5..6a1d615d9 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -59,15 +59,22 @@ public: /// Gets the end location for this basic block. [[nodiscard]] u32 LocationEnd() const noexcept; - /// Gets a mutable reference to the instruction list for this basic block. - [[nodiscard]] InstructionList& Instructions() noexcept; - /// Gets an immutable reference to the instruction list for this basic block. - [[nodiscard]] const InstructionList& Instructions() const noexcept; - /// Adds a new immediate predecessor to this basic block. void AddImmediatePredecessor(Block* block); + + /// Gets a mutable reference to the instruction list for this basic block. + [[nodiscard]] InstructionList& Instructions() noexcept { + return instructions; + } + /// Gets an immutable reference to the instruction list for this basic block. + [[nodiscard]] const InstructionList& Instructions() const noexcept { + return instructions; + } + /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span ImmediatePredecessors() const noexcept { + return imm_predecessors; + } /// Intrusively store the host definition of this instruction. template From 6944cabb899c4367a63cde97ae2bc2eb1a0fb790 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:25:46 -0300 Subject: [PATCH 267/770] shader: Inline common Opcode and Inst functions --- .../frontend/ir/microinstruction.cpp | 18 ---- .../frontend/ir/microinstruction.h | 13 ++- src/shader_recompiler/frontend/ir/opcodes.cpp | 90 +------------------ src/shader_recompiler/frontend/ir/opcodes.h | 74 ++++++++++++++- 4 files changed, 83 insertions(+), 112 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 7555ac00a..41f9fa0cd 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -221,28 +221,10 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { } } -size_t Inst::NumArgs() const { - return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); -} - IR::Type Inst::Type() const { return TypeOf(op); } -Value Inst::Arg(size_t index) const { - if (op == Opcode::Phi) { - if (index >= phi_args.size()) { - throw InvalidArgument("Out of bounds argument index {} in phi instruction", index); - } - return phi_args[index].second; - } else { - if (index >= NumArgsOf(op)) { - throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); - } - return args[index]; - } -} - void Inst::SetArg(size_t index, Value value) { if (index >= NumArgs()) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index dc9f683fe..ea55fc29c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -73,10 +73,19 @@ public: [[nodiscard]] IR::Type Type() const; /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const; + [[nodiscard]] size_t NumArgs() const { + return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } /// Get the value of a given argument index. - [[nodiscard]] Value Arg(size_t index) const; + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + /// Set the value of a given argument index. void SetArg(size_t index, Value value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 4207d548c..24d024ad7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -2,102 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include #include -#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/opcodes.h" namespace Shader::IR { -namespace { -struct OpcodeMeta { - std::string_view name; - Type type; - std::array arg_types; -}; - -// using enum Type; -constexpr Type Void{Type::Void}; -constexpr Type Opaque{Type::Opaque}; -constexpr Type Label{Type::Label}; -constexpr Type Reg{Type::Reg}; -constexpr Type Pred{Type::Pred}; -constexpr Type Attribute{Type::Attribute}; -constexpr Type Patch{Type::Patch}; -constexpr Type U1{Type::U1}; -constexpr Type U8{Type::U8}; -constexpr Type U16{Type::U16}; -constexpr Type U32{Type::U32}; -constexpr Type U64{Type::U64}; -constexpr Type F16{Type::F16}; -constexpr Type F32{Type::F32}; -constexpr Type F64{Type::F64}; -constexpr Type U32x2{Type::U32x2}; -constexpr Type U32x3{Type::U32x3}; -constexpr Type U32x4{Type::U32x4}; -constexpr Type F16x2{Type::F16x2}; -constexpr Type F16x3{Type::F16x3}; -constexpr Type F16x4{Type::F16x4}; -constexpr Type F32x2{Type::F32x2}; -constexpr Type F32x3{Type::F32x3}; -constexpr Type F32x4{Type::F32x4}; -constexpr Type F64x2{Type::F64x2}; -constexpr Type F64x3{Type::F64x3}; -constexpr Type F64x4{Type::F64x4}; - -constexpr std::array META_TABLE{ -#define OPCODE(name_token, type_token, ...) \ - OpcodeMeta{ \ - .name{#name_token}, \ - .type = type_token, \ - .arg_types{__VA_ARGS__}, \ - }, -#include "opcodes.inc" -#undef OPCODE -}; - -constexpr size_t CalculateNumArgsOf(Opcode op) { - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); -} - -constexpr std::array NUM_ARGS{ -#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), -#include "opcodes.inc" -#undef OPCODE -}; - -void ValidateOpcode(Opcode op) { - const size_t raw{static_cast(op)}; - if (raw >= META_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", raw); - } -} -} // Anonymous namespace - -Type TypeOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast(op)].type; -} - -size_t NumArgsOf(Opcode op) { - ValidateOpcode(op); - return NUM_ARGS[static_cast(op)]; -} - -Type ArgTypeOf(Opcode op, size_t arg_index) { - ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { - throw InvalidArgument("Out of bounds argument"); - } - return arg_types[arg_index]; -} std::string_view NameOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast(op)].name; + return Detail::META_TABLE[static_cast(op)].name; } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 999fb2e77..b5697c7f9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include @@ -18,14 +20,80 @@ enum class Opcode { #undef OPCODE }; +namespace Detail { + +struct OpcodeMeta { + std::string_view name; + Type type; + std::array arg_types; +}; + +// using enum Type; +constexpr Type Void{Type::Void}; +constexpr Type Opaque{Type::Opaque}; +constexpr Type Label{Type::Label}; +constexpr Type Reg{Type::Reg}; +constexpr Type Pred{Type::Pred}; +constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; +constexpr Type U1{Type::U1}; +constexpr Type U8{Type::U8}; +constexpr Type U16{Type::U16}; +constexpr Type U32{Type::U32}; +constexpr Type U64{Type::U64}; +constexpr Type F16{Type::F16}; +constexpr Type F32{Type::F32}; +constexpr Type F64{Type::F64}; +constexpr Type U32x2{Type::U32x2}; +constexpr Type U32x3{Type::U32x3}; +constexpr Type U32x4{Type::U32x4}; +constexpr Type F16x2{Type::F16x2}; +constexpr Type F16x3{Type::F16x3}; +constexpr Type F16x4{Type::F16x4}; +constexpr Type F32x2{Type::F32x2}; +constexpr Type F32x3{Type::F32x3}; +constexpr Type F32x4{Type::F32x4}; +constexpr Type F64x2{Type::F64x2}; +constexpr Type F64x3{Type::F64x3}; +constexpr Type F64x4{Type::F64x4}; + +constexpr std::array META_TABLE{ +#define OPCODE(name_token, type_token, ...) \ + OpcodeMeta{ \ + .name{#name_token}, \ + .type = type_token, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcodes.inc" +#undef OPCODE +}; + +constexpr size_t CalculateNumArgsOf(Opcode op) { + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); +} + +constexpr std::array NUM_ARGS{ +#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), +#include "opcodes.inc" +#undef OPCODE +}; +} // namespace Detail + /// Get return type of an opcode -[[nodiscard]] Type TypeOf(Opcode op); +[[nodiscard]] inline Type TypeOf(Opcode op) noexcept { + return Detail::META_TABLE[static_cast(op)].type; +} /// Get the number of arguments an opcode accepts -[[nodiscard]] size_t NumArgsOf(Opcode op); +[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { + return Detail::NUM_ARGS[static_cast(op)]; +} /// Get the required type of an argument of an opcode -[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); +[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept { + return Detail::META_TABLE[static_cast(op)].arg_types[arg_index]; +} /// Get the name of an opcode [[nodiscard]] std::string_view NameOf(Opcode op); From 420982864634a5e52cea42c43f8623f75483fbcc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:27:55 -0300 Subject: [PATCH 268/770] shader: Intrusively store register values in block for SSA pass --- .../frontend/ir/basic_block.h | 10 +++ .../ir_opt/ssa_rewrite_pass.cpp | 64 +++++++++++++------ 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 6a1d615d9..3a4230755 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -101,6 +101,13 @@ public: return branch_false; } + void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { + ssa_reg_values[RegIndex(reg)] = value; + } + const Value& SsaRegValue(IR::Reg reg) const noexcept { + return ssa_reg_values[RegIndex(reg)]; + } + [[nodiscard]] bool empty() const { return instructions.empty(); } @@ -182,6 +189,9 @@ private: /// Block immediate predecessors std::vector imm_predecessors; + /// Intrusively store the value of a register in the block. + std::array ssa_reg_values; + /// Intrusively stored host definition of this block. u32 definition{}; }; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ddd679e39..bb1a90004 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -57,39 +57,62 @@ using Variant = std::variant>; struct DefTable { - [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { - return regs[IR::RegIndex(variable)]; + const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + return block->SsaRegValue(variable); + } + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + block->SetSsaRegValue(variable, value); } - [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept { - return preds[IR::PredIndex(variable)]; + const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + return preds[IR::PredIndex(variable)][block]; + } + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) { - return goto_vars[goto_variable.index]; + const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + return goto_vars[variable.index][block]; + } + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + goto_vars[variable.index].insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) { - return indirect_branch_var; + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + return indirect_branch_var[block]; + } + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + indirect_branch_var.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { - return zero_flag; + const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + return zero_flag[block]; + } + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + zero_flag.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { - return sign_flag; + const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + return sign_flag[block]; + } + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + sign_flag.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { - return carry_flag; + const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + return carry_flag[block]; + } + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + carry_flag.insert_or_assign(block, value); } - [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { - return overflow_flag; + const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + return overflow_flag[block]; + } + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + overflow_flag.insert_or_assign(block, value); } - std::array regs; std::array preds; boost::container::flat_map goto_vars; ValueMap indirect_branch_var; @@ -143,7 +166,7 @@ class Pass { public: template void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { - current_def[variable].insert_or_assign(block, value); + current_def.SetDef(block, variable, value); } template @@ -170,9 +193,8 @@ public: IR::Block* const block{stack.back().block}; switch (stack.back().pc) { case Status::Start: { - const ValueMap& def{current_def[variable]}; - if (const auto it{def.find(block)}; it != def.end()) { - stack.back().result = it->second; + if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { + stack.back().result = def; } else if (!sealed_blocks.contains(block)) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; From e4d1122082e74410baac6677c850fea1a0be5c52 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:08 -0300 Subject: [PATCH 269/770] shader: Move siblings check to a separate function and comment them out --- .../maxwell/structured_control_flow.cpp | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index e63e25aa6..6021ac891 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -304,6 +304,23 @@ bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { return false; } +bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { + Node it{goto_stmt}; + do { + if (it == label_stmt) { + return true; + } + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + if (it == label_stmt) { + return true; + } + ++it; + } + return false; +} + class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, @@ -353,22 +370,10 @@ private: } } } - // TODO: Remove this - { - Node it{goto_stmt}; - bool sibling{false}; - do { - sibling |= it == label_stmt; - --it; - } while (it != goto_stmt->up->children.begin()); - while (it != goto_stmt->up->children.end()) { - sibling |= it == label_stmt; - ++it; - } - if (!sibling) { - throw LogicError("Not siblings"); - } - } + // Expensive operation: + // if (!AreSiblings(goto_stmt, label_stmt)) { + // throw LogicError("Goto is not a sibling with the label"); + // } // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: [PATCH 270/770] shader: Move microinstruction header to the value header --- src/shader_recompiler/CMakeLists.txt | 1 - .../backend/spirv/emit_spirv.cpp | 1 - .../backend/spirv/emit_spirv.h | 1 - .../frontend/ir/basic_block.h | 1 - .../frontend/ir/breadth_first_search.h | 1 - .../frontend/ir/microinstruction.cpp | 2 +- .../frontend/ir/microinstruction.h | 162 ------------------ src/shader_recompiler/frontend/ir/program.cpp | 2 +- src/shader_recompiler/frontend/ir/value.cpp | 1 - src/shader_recompiler/frontend/ir/value.h | 151 +++++++++++++++- .../maxwell/structured_control_flow.h | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 2 +- .../ir_opt/constant_propagation_pass.cpp | 2 +- .../ir_opt/dead_code_elimination_pass.cpp | 2 +- .../global_memory_to_storage_buffer_pass.cpp | 2 +- .../ir_opt/identity_removal_pass.cpp | 2 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- .../ir_opt/ssa_rewrite_pass.cpp | 2 +- .../ir_opt/verification_pass.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.h | 2 +- 20 files changed, 162 insertions(+), 181 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/microinstruction.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 07963a760..3d7506de2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -36,7 +36,6 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp - frontend/ir/microinstruction.h frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5d6fdeb65..815b3cd95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -10,7 +10,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89a82e858..4562db45b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,7 +8,6 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3a4230755..ab7ddb3d5 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -13,7 +13,6 @@ #include "common/bit_cast.h" #include "shader_recompiler/frontend/ir/condition.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h index b35f062d4..a52ccbd58 100644 --- a/src/shader_recompiler/frontend/ir/breadth_first_search.h +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h @@ -10,7 +10,6 @@ #include -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 41f9fa0cd..701746a0c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -6,8 +6,8 @@ #include #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { namespace { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h deleted file mode 100644 index ea55fc29c..000000000 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include "common/bit_cast.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/ir/opcodes.h" -#include "shader_recompiler/frontend/ir/type.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::IR { - -class Block; - -struct AssociatedInsts; - -class Inst : public boost::intrusive::list_base_hook<> { -public: - explicit Inst(Opcode op_, u32 flags_) noexcept; - ~Inst(); - - Inst& operator=(const Inst&) = delete; - Inst(const Inst&) = delete; - - Inst& operator=(Inst&&) = delete; - Inst(Inst&&) = delete; - - /// Get the number of uses this instruction has. - [[nodiscard]] int UseCount() const noexcept { - return use_count; - } - - /// Determines whether this instruction has uses or not. - [[nodiscard]] bool HasUses() const noexcept { - return use_count > 0; - } - - /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode GetOpcode() const noexcept { - return op; - } - - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { - return associated_insts != nullptr; - } - - /// Determines whether or not this instruction may have side effects. - [[nodiscard]] bool MayHaveSideEffects() const noexcept; - - /// Determines whether or not this instruction is a pseudo-instruction. - /// Pseudo-instructions depend on their parent instructions for their semantics. - [[nodiscard]] bool IsPseudoInstruction() const noexcept; - - /// Determines if all arguments of this instruction are immediates. - [[nodiscard]] bool AreAllArgsImmediates() const; - - /// Gets a pseudo-operation associated with this instruction - [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); - - /// Get the type this instruction returns. - [[nodiscard]] IR::Type Type() const; - - /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const { - return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); - } - - /// Get the value of a given argument index. - [[nodiscard]] Value Arg(size_t index) const noexcept { - if (op == Opcode::Phi) { - return phi_args[index].second; - } else { - return args[index]; - } - } - - /// Set the value of a given argument index. - void SetArg(size_t index, Value value); - - /// Get a pointer to the block of a phi argument. - [[nodiscard]] Block* PhiBlock(size_t index) const; - /// Add phi operand to a phi instruction. - void AddPhiOperand(Block* predecessor, const Value& value); - - void Invalidate(); - void ClearArgs(); - - void ReplaceUsesWith(Value replacement); - - void ReplaceOpcode(IR::Opcode opcode); - - template - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] FlagsType Flags() const noexcept { - FlagsType ret; - std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); - return ret; - } - - template - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] void SetFlags(FlagsType value) noexcept { - std::memcpy(&flags, &value, sizeof(value)); - } - - /// Intrusively store the host definition of this instruction. - template - void SetDefinition(DefinitionType def) { - definition = Common::BitCast(def); - } - - /// Return the intrusively stored host definition of this instruction. - template - [[nodiscard]] DefinitionType Definition() const noexcept { - return Common::BitCast(definition); - } - -private: - struct NonTriviallyDummy { - NonTriviallyDummy() noexcept {} - }; - - void Use(const Value& value); - void UndoUse(const Value& value); - - IR::Opcode op{}; - int use_count{}; - u32 flags{}; - u32 definition{}; - union { - NonTriviallyDummy dummy{}; - boost::container::small_vector, 2> phi_args; - std::array args; - }; - std::unique_ptr associated_insts; -}; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); - -struct AssociatedInsts { - union { - Inst* in_bounds_inst; - Inst* sparse_inst; - Inst* zero_inst{}; - }; - Inst* sign_inst{}; - Inst* carry_inst{}; - Inst* overflow_inst{}; -}; - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 89a17fb1b..3fc06f855 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -8,8 +8,8 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index bf5f8c0c2..a8a919e0e 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 303745563..d90a68b37 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -4,19 +4,34 @@ #pragma once +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" -#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { class Block; class Inst; +struct AssociatedInsts; + class Value { public: Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} @@ -101,6 +116,140 @@ public: explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { + return op; + } + + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines whether or not this instruction is a pseudo-instruction. + /// Pseudo-instructions depend on their parent instructions for their semantics. + [[nodiscard]] bool IsPseudoInstruction() const noexcept; + + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const; + + /// Gets a pseudo-operation associated with this instruction + [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const { + return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == IR::Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + + void ReplaceOpcode(IR::Opcode opcode); + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); + return ret; + } + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); + } + + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + +private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + u32 flags{}; + u32 definition{}; + union { + NonTriviallyDummy dummy{}; + boost::container::small_vector, 2> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* in_bounds_inst; + Inst* sparse_inst; + Inst* zero_inst{}; + }; + Inst* sign_inst{}; + Inst* carry_inst{}; + Inst* overflow_inst{}; +}; + using U1 = TypedValue; using U8 = TypedValue; using U16 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index e4797291e..a6be12ba2 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,7 +11,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cccf0909d..bb4aeb57c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 2206f93c2..770d3de61 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -11,7 +11,7 @@ #include "common/bit_util.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 8ad59f42e..f9c5334b5 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f294d297f..87eca2a0d 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -15,7 +15,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 38af72dfe..6afbe24f7 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 62e73d52d..773e1f961 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bb1a90004..fe86a164b 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -23,10 +23,10 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index dbec96d84..62bf5f8ff 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -6,7 +6,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8b6839966..e12e4422f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -16,7 +16,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" From c84bbd9e44e34bba0e602c1a6736535aa531445c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 02:42:36 -0300 Subject: [PATCH 271/770] shader: Inline common Value functions into the header --- src/shader_recompiler/frontend/ir/value.cpp | 19 ----------------- src/shader_recompiler/frontend/ir/value.h | 23 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index a8a919e0e..c021d3fa9 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,25 +33,6 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} -bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; -} - -bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; -} - -bool Value::IsEmpty() const noexcept { - return type == Type::Void; -} - -bool Value::IsImmediate() const noexcept { - if (IsIdentity()) { - return inst->Arg(0).IsImmediate(); - } - return type != Type::Opaque; -} - bool Value::IsLabel() const noexcept { return type == Type::Label; } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index d90a68b37..5425e42a1 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -264,4 +264,27 @@ using U16U32U64 = TypedValue; using F16F32F64 = TypedValue; using UAny = TypedValue; +inline bool Value::IsIdentity() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; +} + +inline bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; +} + +inline bool Value::IsEmpty() const noexcept { + return type == Type::Void; +} + +inline bool Value::IsImmediate() const noexcept { + IR::Type current_type{type}; + const IR::Inst* current_inst{inst}; + while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) { + const Value& arg{current_inst->Arg(0)}; + current_type = arg.type; + current_inst = arg.inst; + } + return current_type != Type::Opaque; +} + } // namespace Shader::IR From f66851e37682ce538172b0945908227ada8d21ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 02:43:44 -0300 Subject: [PATCH 272/770] shader: Use memset to reset instruction arguments --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 +++- src/shader_recompiler/frontend/ir/value.h | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 701746a0c..e3350931b 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -279,8 +279,10 @@ void Inst::ClearArgs() { if (!value.IsImmediate()) { UndoUse(value); } - value = {}; } + // Reset arguments to null + // std::memset was measured to be faster on MSVC than std::ranges:fill + std::memset(&args, 0, sizeof(args)); } } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 5425e42a1..7b20824ed 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -34,7 +34,7 @@ struct AssociatedInsts; class Value { public: - Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} + Value() noexcept = default; explicit Value(IR::Inst* value) noexcept; explicit Value(IR::Block* value) noexcept; explicit Value(IR::Reg value) noexcept; @@ -78,9 +78,9 @@ public: private: void ValidateAccess(IR::Type expected) const; - IR::Type type; + IR::Type type{}; union { - IR::Inst* inst; + IR::Inst* inst{}; IR::Block* label; IR::Reg reg; IR::Pred pred; @@ -95,6 +95,7 @@ private: f64 imm_f64; }; }; +static_assert(static_cast(IR::Type::Void) == 0, "memset relies on IR::Type being zero"); static_assert(std::is_trivially_copyable_v); template From cc0fcd1b8d3080ae83709874e1d66f9e4cf3f1be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 03:39:35 -0300 Subject: [PATCH 273/770] shader: Improve goto removal algorithm complexity Find sibling node containing a nephew searching from the nephew itself instead of the uncle. --- .../maxwell/structured_control_flow.cpp | 77 +++++++------------ 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 6021ac891..b85b613f3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -222,27 +222,6 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { return ret; } -bool HasNode(const Tree& tree, ConstNode stmt) { - const auto end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { - return true; - } - } - return false; -} - -Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { - const ConstNode label_stmt{goto_stmt->label}; - const ConstNode end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { - return it; - } - } - throw LogicError("Lift label not in tree"); -} - void SanitizeNoBreaks(const Tree& tree) { if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { throw NotImplementedException("Capturing statement with break nodes"); @@ -288,22 +267,6 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); } -bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { - ++offset; - - const auto end = tree.end(); - for (ConstNode it = tree.begin(); it != end; ++it) { - ++offset; - if (stmt == it) { - return true; - } - if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { - return true; - } - } - return false; -} - bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { Node it{goto_stmt}; do { @@ -321,6 +284,30 @@ bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { return false; } +Node SiblingFromNephew(Node uncle, Node nephew) noexcept { + Statement* const parent{uncle->up}; + Statement* it{&*nephew}; + while (it->up != parent) { + it = it->up; + } + return Tree::s_iterator_to(*it); +} + +bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { + const Node end{right_sibling->up->children.end()}; + for (auto it = right_sibling; it != end; ++it) { + if (it == left_sibling) { + return false; + } + } + return true; +} + +bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { + const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; + return AreOrdered(sibling, goto_stmt); +} + class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, @@ -358,7 +345,7 @@ private: --goto_level; } } else { // Level(goto_stmt) < Level(label_stmt) - if (Offset(goto_stmt) > Offset(label_stmt)) { + if (NeedsLift(goto_stmt, label_stmt)) { // Lift goto_stmt to above stmt containing label_stmt using goto-lifting // transformations goto_stmt = Lift(goto_stmt); @@ -378,7 +365,7 @@ private: if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it goto_stmt->up->children.erase(goto_stmt); - } else if (Offset(goto_stmt) < Offset(label_stmt)) { + } else if (AreOrdered(goto_stmt, label_stmt)) { // Eliminate goto_stmt with a conditional EliminateAsConditional(goto_stmt, label_stmt); } else { @@ -523,8 +510,8 @@ private: [[nodiscard]] Node MoveInward(Node goto_stmt) { Statement* const parent{goto_stmt->up}; Tree& body{parent->children}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; const Node label{goto_stmt->label}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; const u32 label_id{label->id}; Statement* const goto_cond{goto_stmt->cond}; @@ -562,7 +549,7 @@ private: Tree& body{parent->children}; const Node label{goto_stmt->label}; const u32 label_id{label->id}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -627,14 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - size_t Offset(ConstNode stmt) const { - size_t offset{0}; - if (!SearchNode(root_stmt.children, stmt, offset)) { - throw LogicError("Node not found in tree"); - } - return offset; - } - ObjectPool& inst_pool; ObjectPool& block_pool; ObjectPool& pool; From eed6da55b83327c9e3c5a135860c4b815bcca204 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 04:45:14 -0300 Subject: [PATCH 274/770] cmake: Link to common in shader_recompiler --- src/shader_recompiler/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3d7506de2..051e5d05a 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -175,7 +175,7 @@ add_library(shader_recompiler STATIC shader_info.h ) -target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) +target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) add_executable(shader_util main.cpp) target_link_libraries(shader_util PRIVATE shader_recompiler) From 23182fa59c45a88b706022c1373e307ba4636cca Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 04:58:23 -0300 Subject: [PATCH 275/770] shader: Intrusively store in a block if it's sealed or not --- src/shader_recompiler/frontend/ir/basic_block.h | 9 +++++++++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 5 ++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ab7ddb3d5..0b0c97af6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -107,6 +107,13 @@ public: return ssa_reg_values[RegIndex(reg)]; } + void SsaSeal() noexcept { + is_ssa_sealed = true; + } + [[nodiscard]] bool IsSsaSealed() const noexcept { + return is_ssa_sealed; + } + [[nodiscard]] bool empty() const { return instructions.empty(); } @@ -190,6 +197,8 @@ private: /// Intrusively store the value of a register in the block. std::array ssa_reg_values; + /// Intrusively store if the block is sealed in the SSA pass. + bool is_ssa_sealed{false}; /// Intrusively stored host definition of this block. u32 definition{}; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index fe86a164b..3bab742e7 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -195,7 +195,7 @@ public: case Status::Start: { if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { stack.back().result = def; - } else if (!sealed_blocks.contains(block)) { + } else if (!block->IsSsaSealed()) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); @@ -251,7 +251,7 @@ public: std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } - sealed_blocks.insert(block); + block->SsaSeal(); } private: @@ -297,7 +297,6 @@ private: return same; } - boost::container::flat_set sealed_blocks; boost::container::flat_map> incomplete_phis; DefTable current_def; From 75dee55486cac936f3a2d68533772d23aa733b72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 04:59:16 -0300 Subject: [PATCH 276/770] shader: Inline common Value getters --- src/shader_recompiler/frontend/ir/value.cpp | 107 -------------------- src/shader_recompiler/frontend/ir/value.h | 104 ++++++++++++++++++- 2 files changed, 102 insertions(+), 109 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index c021d3fa9..b962f170d 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -51,107 +51,6 @@ IR::Type Value::Type() const noexcept { return type; } -IR::Inst* Value::Inst() const { - ValidateAccess(Type::Opaque); - return inst; -} - -IR::Block* Value::Label() const { - ValidateAccess(Type::Label); - return label; -} - -IR::Inst* Value::InstRecursive() const { - ValidateAccess(Type::Opaque); - if (IsIdentity()) { - return inst->Arg(0).InstRecursive(); - } - return inst; -} - -IR::Value Value::Resolve() const { - if (IsIdentity()) { - return inst->Arg(0).Resolve(); - } - return *this; -} - -IR::Reg Value::Reg() const { - ValidateAccess(Type::Reg); - return reg; -} - -IR::Pred Value::Pred() const { - ValidateAccess(Type::Pred); - return pred; -} - -IR::Attribute Value::Attribute() const { - ValidateAccess(Type::Attribute); - return attribute; -} - -IR::Patch Value::Patch() const { - ValidateAccess(Type::Patch); - return patch; -} - -bool Value::U1() const { - if (IsIdentity()) { - return inst->Arg(0).U1(); - } - ValidateAccess(Type::U1); - return imm_u1; -} - -u8 Value::U8() const { - if (IsIdentity()) { - return inst->Arg(0).U8(); - } - ValidateAccess(Type::U8); - return imm_u8; -} - -u16 Value::U16() const { - if (IsIdentity()) { - return inst->Arg(0).U16(); - } - ValidateAccess(Type::U16); - return imm_u16; -} - -u32 Value::U32() const { - if (IsIdentity()) { - return inst->Arg(0).U32(); - } - ValidateAccess(Type::U32); - return imm_u32; -} - -f32 Value::F32() const { - if (IsIdentity()) { - return inst->Arg(0).F32(); - } - ValidateAccess(Type::F32); - return imm_f32; -} - -u64 Value::U64() const { - if (IsIdentity()) { - return inst->Arg(0).U64(); - } - ValidateAccess(Type::U64); - return imm_u64; -} - -f64 Value::F64() const { - if (IsIdentity()) { - return inst->Arg(0).F64(); - } - ValidateAccess(Type::F64); - return imm_f64; -} - bool Value::operator==(const Value& other) const { if (type != other.type) { return false; @@ -205,10 +104,4 @@ bool Value::operator!=(const Value& other) const { return !operator==(other); } -void Value::ValidateAccess(IR::Type expected) const { - if (type != expected) { - throw LogicError("Reading {} out of {}", expected, type); - } -} - } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 7b20824ed..bb7d19001 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -14,6 +14,7 @@ #include #include +#include "common/assert.h" #include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" @@ -76,8 +77,6 @@ public: [[nodiscard]] bool operator!=(const Value& other) const; private: - void ValidateAccess(IR::Type expected) const; - IR::Type type{}; union { IR::Inst* inst{}; @@ -288,4 +287,105 @@ inline bool Value::IsImmediate() const noexcept { return current_type != Type::Opaque; } +inline IR::Inst* Value::Inst() const { + DEBUG_ASSERT(type == Type::Opaque); + return inst; +} + +inline IR::Block* Value::Label() const { + DEBUG_ASSERT(type == Type::Label); + return label; +} + +inline IR::Inst* Value::InstRecursive() const { + DEBUG_ASSERT(type == Type::Opaque); + if (IsIdentity()) { + return inst->Arg(0).InstRecursive(); + } + return inst; +} + +inline IR::Value Value::Resolve() const { + if (IsIdentity()) { + return inst->Arg(0).Resolve(); + } + return *this; +} + +inline IR::Reg Value::Reg() const { + DEBUG_ASSERT(type == Type::Reg); + return reg; +} + +inline IR::Pred Value::Pred() const { + DEBUG_ASSERT(type == Type::Pred); + return pred; +} + +inline IR::Attribute Value::Attribute() const { + DEBUG_ASSERT(type == Type::Attribute); + return attribute; +} + +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + +inline bool Value::U1() const { + if (IsIdentity()) { + return inst->Arg(0).U1(); + } + DEBUG_ASSERT(type == Type::U1); + return imm_u1; +} + +inline u8 Value::U8() const { + if (IsIdentity()) { + return inst->Arg(0).U8(); + } + DEBUG_ASSERT(type == Type::U8); + return imm_u8; +} + +inline u16 Value::U16() const { + if (IsIdentity()) { + return inst->Arg(0).U16(); + } + DEBUG_ASSERT(type == Type::U16); + return imm_u16; +} + +inline u32 Value::U32() const { + if (IsIdentity()) { + return inst->Arg(0).U32(); + } + DEBUG_ASSERT(type == Type::U32); + return imm_u32; +} + +inline f32 Value::F32() const { + if (IsIdentity()) { + return inst->Arg(0).F32(); + } + DEBUG_ASSERT(type == Type::F32); + return imm_f32; +} + +inline u64 Value::U64() const { + if (IsIdentity()) { + return inst->Arg(0).U64(); + } + DEBUG_ASSERT(type == Type::U64); + return imm_u64; +} + +inline f64 Value::F64() const { + if (IsIdentity()) { + return inst->Arg(0).F64(); + } + DEBUG_ASSERT(type == Type::F64); + return imm_f64; +} + } // namespace Shader::IR From c8f9772d6590a018665d47a165951864ff783017 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 06:10:04 -0300 Subject: [PATCH 277/770] shader: Fix gcc warnings --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 2 +- .../frontend/maxwell/structured_control_flow.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index e3350931b..b424d038e 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -282,7 +282,7 @@ void Inst::ClearArgs() { } // Reset arguments to null // std::memset was measured to be faster on MSVC than std::ranges:fill - std::memset(&args, 0, sizeof(args)); + std::memset(reinterpret_cast(&args), 0, sizeof(args)); } } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index b85b613f3..cc5410c6d 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -267,7 +267,7 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); } -bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { +[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { Node it{goto_stmt}; do { if (it == label_stmt) { From 427951d6fe8a0914434b0fcf897eef67749cba9d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Apr 2021 20:59:51 -0400 Subject: [PATCH 278/770] shader: add missing include guard in half_floating_point_helper.h --- .../maxwell/translate/impl/half_floating_point_helper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index 24063b2ab..59da56a7e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" From 7a9dc7839876fe5b24d1c841f182e01108ba676e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Apr 2021 21:34:51 -0400 Subject: [PATCH 279/770] shader: Refactor atomic_operations_global_memory --- .../impl/atomic_operations_global_memory.cpp | 80 +++++++++---------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 7a32c5eb3..66f39e44e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -154,69 +154,61 @@ void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result break; } } + +IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, + AtomSize size, AtomOp op) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); + case AtomSize::U64: + case AtomSize::S64: + return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); + case AtomSize::F32: + return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); + case AtomSize::F16x2: { + return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); + } + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, + const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { + IR::Value result; + if (AtomOpNotApplicable(size, op)) { + result = LoadGlobal(v.ir, offset, size); + } else { + result = ApplyAtomOp(v, operand_reg, offset, size, op); + } + if (write_dest) { + StoreResult(v, dest_reg, result, size); + } +} } // Anonymous namespace void TranslatorVisitor::ATOM(u64 insn) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> addr_reg; - BitField<20, 8, IR::Reg> src_reg_b; + BitField<20, 8, IR::Reg> operand_reg; BitField<49, 3, AtomSize> size; BitField<52, 4, AtomOp> op; } const atom{insn}; - - const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; - const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; - const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; const IR::U64 offset{AtomOffset(*this, insn)}; - IR::Value result; - - if (AtomOpNotApplicable(atom.size, atom.op)) { - result = LoadGlobal(ir, offset, atom.size); - } else if (!is_integer) { - if (atom.size == AtomSize::F32) { - result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); - } else { - const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; - result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); - } - } else if (size_64) { - result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); - } else { - result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); - } - StoreResult(*this, atom.dest_reg, result, atom.size); + GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); } void TranslatorVisitor::RED(u64 insn) { union { u64 raw; - BitField<0, 8, IR::Reg> src_reg_b; - BitField<8, 8, IR::Reg> addr_reg; + BitField<0, 8, IR::Reg> operand_reg; BitField<20, 3, AtomSize> size; BitField<23, 3, AtomOp> op; } const red{insn}; - - if (AtomOpNotApplicable(red.size, red.op)) { - return; - } - const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; - const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; - const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; const IR::U64 offset{AtomOffset(*this, insn)}; - if (!is_integer) { - if (red.size == AtomSize::F32) { - ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); - } else { - const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; - ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); - } - } else if (size_64) { - ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); - } else { - ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); - } + GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); } } // namespace Shader::Maxwell From d10cf55353175b13bed4cf18791e080ecb7fd95b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:17:59 -0300 Subject: [PATCH 280/770] shader: Implement indexed textures --- .../backend/spirv/emit_context.cpp | 79 +++++++++------- .../backend/spirv/emit_context.h | 11 ++- .../backend/spirv/emit_spirv_image.cpp | 58 +++++++----- src/shader_recompiler/frontend/ir/modifiers.h | 17 ++-- src/shader_recompiler/frontend/ir/opcodes.inc | 24 ++--- src/shader_recompiler/ir_opt/texture_pass.cpp | 89 +++++++++++++++---- src/shader_recompiler/shader_info.h | 4 + .../renderer_vulkan/pipeline_helper.h | 50 ++++++----- .../renderer_vulkan/vk_compute_pipeline.cpp | 46 ++++++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 63 +++++++------ 10 files changed, 284 insertions(+), 157 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 7f16cb0dc..8e625f8fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -380,6 +380,24 @@ Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_p ctx.OpFunctionEnd(); return func; } + +template +std::string NameOf(const Desc& desc, std::string_view prefix) { + if (desc.count > 1) { + return fmt::format("{}{}_{:02x}x{}", prefix, desc.cbuf_index, desc.cbuf_offset, desc.count); + } else { + return fmt::format("{}{}_{:02x}", prefix, desc.cbuf_index, desc.cbuf_offset); + } +} + +Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { + if (count > 1) { + const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; + return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); + } else { + return pointer_type; + } +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -971,12 +989,15 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - texture_buffers.insert(texture_buffers.end(), desc.count, id); + Name(id, NameOf(desc, "texbuf")); + texture_buffers.push_back({ + .id = id, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } @@ -992,44 +1013,41 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("imgbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - const ImageBufferDefinition def{ + Name(id, NameOf(desc, "imgbuf")); + image_buffers.push_back({ .id = id, .image_type = image_type, - }; - image_buffers.insert(image_buffers.end(), desc.count, def); + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { - if (desc.count != 1) { - throw NotImplementedException("Array of textures"); - } const Id image_type{ImageType(*this, desc)}; const Id sampled_type{TypeSampledImage(image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; + const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - for (u32 index = 0; index < desc.count; ++index) { - // TODO: Pass count info - textures.push_back(TextureDefinition{ - .id{id}, - .sampled_type{sampled_type}, - .image_type{image_type}, - }); - } + Name(id, NameOf(desc, "tex")); + textures.push_back({ + .id = id, + .sampled_type = sampled_type, + .pointer_type = pointer_type, + .image_type = image_type, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } @@ -1037,24 +1055,23 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { images.reserve(info.image_descriptors.size()); for (const ImageDescriptor& desc : info.image_descriptors) { if (desc.count != 1) { - throw NotImplementedException("Array of textures"); + throw NotImplementedException("Array of images"); } const Id image_type{ImageType(*this, desc)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("img{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - for (u32 index = 0; index < desc.count; ++index) { - images.push_back(ImageDefinition{ - .id{id}, - .image_type{image_type}, - }); - } + Name(id, NameOf(desc, "img")); + images.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index a4503c7ab..c52544fb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -32,17 +32,26 @@ private: struct TextureDefinition { Id id; Id sampled_type; + Id pointer_type; Id image_type; + u32 count; +}; + +struct TextureBufferDefinition { + Id id; + u32 count; }; struct ImageBufferDefinition { Id id; Id image_type; + u32 count; }; struct ImageDefinition { Id id; Id image_type; + u32 count; }; struct UniformDefinitions { @@ -162,7 +171,7 @@ public: std::array cbufs{}; std::array ssbos{}; - std::vector texture_buffers; + std::vector texture_buffers; std::vector image_buffers; std::vector textures; std::vector images; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 90817f161..6008980af 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -147,24 +147,31 @@ private: spv::ImageOperandsMask mask{}; }; -Id Texture(EmitContext& ctx, const IR::Value& index) { - if (index.IsImmediate()) { - const TextureDefinition def{ctx.textures.at(index.U32())}; +Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; + return ctx.OpLoad(def.sampled_type, pointer); + } else { return ctx.OpLoad(def.sampled_type, def.id); } - throw NotImplementedException("Indirect texture sample"); } -Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { - if (!index.IsImmediate()) { - throw NotImplementedException("Indirect texture sample"); - } +Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { if (info.type == TextureType::Buffer) { - const Id sampler_id{ctx.texture_buffers.at(index.U32())}; + const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + const Id sampler_id{def.id}; const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; return ctx.OpImage(ctx.image_buffer_type, id); } else { - const TextureDefinition def{ctx.textures.at(index.U32())}; + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } } @@ -311,7 +318,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& bias_lc, offset); return Emit(&EmitContext::OpImageSparseSampleImplicitLod, &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } else { // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as // if the lod was explicitly zero. This may change on Turing with implicit compute @@ -320,7 +327,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } } @@ -329,8 +336,8 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& const auto info{inst->Flags()}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -340,7 +347,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va offset); return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -349,7 +356,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -357,15 +364,17 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, ctx.Const(info.gather_component), + ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), operands.Mask(), operands.Span()); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { + const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.Mask(), + operands.Span()); } Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, @@ -376,12 +385,12 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, index, info), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { const auto info{inst->Flags()}; - const Id image{TextureImage(ctx, index, info)}; + const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { @@ -405,9 +414,10 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i throw LogicError("Unspecified image type {}", info.type.Value()); } -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coords) { +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags()}; const Id zero{ctx.f32_zero_value}; - const Id sampler{Texture(ctx, index)}; + const Id sampler{Texture(ctx, info, index)}; return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), zero, zero); } @@ -418,8 +428,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 5d7efa14c..77cda1f8a 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -34,14 +34,15 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; - BitField<0, 8, TextureType> type; - BitField<8, 1, u32> is_depth; - BitField<9, 1, u32> has_bias; - BitField<10, 1, u32> has_lod_clamp; - BitField<11, 1, u32> relaxed_precision; - BitField<12, 2, u32> gather_component; - BitField<14, 2, u32> num_derivates; - BitField<16, 3, ImageFormat> image_format; + BitField<0, 16, u32> descriptor_index; + BitField<16, 3, TextureType> type; + BitField<19, 1, u32> is_depth; + BitField<20, 1, u32> has_bias; + BitField<21, 1, u32> has_lod_clamp; + BitField<22, 1, u32> relaxed_precision; + BitField<23, 2, u32> gather_component; + BitField<25, 2, u32> num_derivates; + BitField<27, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b6869d4e4..8f32c9e74 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -482,18 +482,18 @@ OPCODE(BoundImageGradient, F32x4, U32, OPCODE(BoundImageRead, U32x4, U32, Opaque, ) OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) -OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) -OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) -OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageRead, U32x4, U32, Opaque, ) -OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) // Warp operations OPCODE(LaneId, U32, ) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 5ac485522..cfa6b34b9 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -21,6 +22,8 @@ struct ConstBufferAddr { u32 offset; u32 secondary_index; u32 secondary_offset; + IR::U32 dynamic_offset; + u32 count; bool has_secondary; }; @@ -32,6 +35,9 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; +constexpr u32 DESCRIPTOR_SIZE = 8; +constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE)); + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -131,6 +137,9 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { if (lhs->has_secondary || rhs->has_secondary) { return std::nullopt; } + if (lhs->count > 1 || rhs->count > 1) { + return std::nullopt; + } if (lhs->index > rhs->index || lhs->offset > rhs->offset) { std::swap(lhs, rhs); } @@ -139,9 +148,12 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { .offset = lhs->offset, .secondary_index = rhs->index, .secondary_offset = rhs->offset, + .dynamic_offset = {}, + .count = 1, .has_secondary = true, }; } + case IR::Opcode::GetCbufU32x2: case IR::Opcode::GetCbufU32: break; } @@ -152,15 +164,39 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { // but not supported here at the moment return std::nullopt; } - if (!offset.IsImmediate()) { - // TODO: Support arrays of textures + if (offset.IsImmediate()) { + return ConstBufferAddr{ + .index = index.U32(), + .offset = offset.U32(), + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, + }; + } + IR::Inst* const offset_inst{offset.InstRecursive()}; + if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { + return std::nullopt; + } + u32 base_offset{}; + IR::U32 dynamic_offset; + if (offset_inst->Arg(0).IsImmediate()) { + base_offset = offset_inst->Arg(0).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(1)}; + } else if (offset_inst->Arg(1).IsImmediate()) { + base_offset = offset_inst->Arg(1).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(0)}; + } else { return std::nullopt; } return ConstBufferAddr{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = base_offset, .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = dynamic_offset, + .count = 8, .has_secondary = false, }; } @@ -179,11 +215,13 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { .offset = inst.Arg(0).U32(), .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, .has_secondary = false, }; } return TextureInst{ - .cbuf{addr}, + .cbuf = addr, .inst = &inst, .block = block, }; @@ -209,18 +247,20 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.has_secondary == existing.has_secondary && - desc.cbuf_index == existing.cbuf_index && + return desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift && + desc.has_secondary == existing.has_secondary; }); } u32 Add(const ImageBufferDescriptor& desc) { return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; }); } @@ -231,7 +271,8 @@ public: desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift; }); } @@ -239,7 +280,8 @@ public: const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; return index; @@ -310,7 +352,6 @@ void TexturePass(Environment& env, IR::Program& program) { // This happens on Fire Emblem: Three Houses flags.type.Assign(TextureType::Buffer); } - inst->SetFlags(flags); break; default: break; @@ -329,7 +370,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(ImageDescriptor{ @@ -338,7 +380,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; @@ -351,7 +394,8 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(TextureDescriptor{ @@ -362,12 +406,23 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; } - inst->SetArg(0, IR::Value{index}); + flags.descriptor_index.Assign(index); + inst->SetFlags(flags); + + if (cbuf.count > 1) { + const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; + IR::IREmitter ir{*texture_inst.block, insert_point}; + const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; + inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); + } else { + inst->SetArg(0, IR::Value{}); + } } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 0f45bdfb6..0f28ae07b 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -67,6 +67,7 @@ struct TextureBufferDescriptor { u32 secondary_cbuf_index; u32 secondary_cbuf_offset; u32 count; + u32 size_shift; }; using TextureBufferDescriptors = boost::container::small_vector; @@ -76,6 +77,7 @@ struct ImageBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + u32 size_shift; }; using ImageBufferDescriptors = boost::container::small_vector; @@ -88,6 +90,7 @@ struct TextureDescriptor { u32 secondary_cbuf_index; u32 secondary_cbuf_offset; u32 count; + u32 size_shift; }; using TextureDescriptors = boost::container::small_vector; @@ -98,6 +101,7 @@ struct ImageDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + u32 size_shift; }; using ImageDescriptors = boost::container::small_vector; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index aaf9a735e..dd7d2cc0c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,28 +85,30 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + template + void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { + const size_t num{descriptors.size()}; for (size_t i = 0; i < num; ++i) { bindings.push_back({ .binding = binding, .descriptorType = type, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .stageFlags = stage, .pImmutableSamplers = nullptr, }); entries.push_back({ .dstBinding = binding, .dstArrayElement = 0, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .descriptorType = type, .offset = offset, .stride = sizeof(DescriptorUpdateEntry), @@ -126,21 +128,29 @@ private: inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { - image_view_ids += info.texture_buffer_descriptors.size(); - image_view_ids += info.image_buffer_descriptors.size(); + for (const auto& desc : info.texture_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + image_view_ids += desc.count; + } for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(desc.type)}; - update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + for (u32 index = 0; index < desc.count; ++index) { + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + } } for (const auto& desc : info.image_descriptors) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - if (desc.is_written) { - texture_cache.MarkModification(image_view.image_id); + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); } - const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - update_descriptor_queue.AddImage(vk_image_view); } } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 45d837ca4..6e9f66262 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -91,35 +91,41 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - desc.secondary_cbuf_offset}; + secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TextureHandle{lhs_raw | rhs_raw, via_header_index}; } } return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - samplers.push_back(sampler->Handle()); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); @@ -130,16 +136,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++texture_buffer_ids; + ++index; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); - buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format, is_written); - ++texture_buffer_ids; - ++index; }}; std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 08f00b9ce..b7688aef9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -161,23 +161,26 @@ void GraphicsPipeline::Configure(bool is_indexed) { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++index; + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); - const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - desc.secondary_cbuf_offset}; + second_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; @@ -187,17 +190,21 @@ void GraphicsPipeline::Configure(bool is_indexed) { return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); } @@ -208,24 +215,30 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++index; + ++texture_buffer_index; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written); - ++index; - ++texture_buffer_index; }}; const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsTextureBuffers(stage); std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - texture_buffer_index += info.texture_descriptors.size(); - texture_buffer_index += info.image_descriptors.size(); + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } buffer_cache.UpdateGraphicsBuffers(is_indexed); From 21e3382830e4a2b294ea8b84ffad531e752fc512 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:18:54 -0300 Subject: [PATCH 281/770] shader: Simplify code in opcodes.h to fix Intellisense Avoid using std::array to fix Intellisense not properly compiling this code and disabling itself on all files that include it. While we are at it, change the code to use u8 instead of size_t for the number of instructions in an opcode. --- src/shader_recompiler/frontend/ir/opcodes.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index b5697c7f9..2b9c0ed8c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include #include @@ -21,7 +21,6 @@ enum class Opcode { }; namespace Detail { - struct OpcodeMeta { std::string_view name; Type type; @@ -57,9 +56,9 @@ constexpr Type F64x2{Type::F64x2}; constexpr Type F64x3{Type::F64x3}; constexpr Type F64x4{Type::F64x4}; -constexpr std::array META_TABLE{ +constexpr OpcodeMeta META_TABLE[]{ #define OPCODE(name_token, type_token, ...) \ - OpcodeMeta{ \ + { \ .name{#name_token}, \ .type = type_token, \ .arg_types{__VA_ARGS__}, \ @@ -67,14 +66,13 @@ constexpr std::array META_TABLE{ #include "opcodes.inc" #undef OPCODE }; - constexpr size_t CalculateNumArgsOf(Opcode op) { const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); } -constexpr std::array NUM_ARGS{ -#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), +constexpr u8 NUM_ARGS[]{ +#define OPCODE(name_token, type_token, ...) static_cast(CalculateNumArgsOf(Opcode::name_token)), #include "opcodes.inc" #undef OPCODE }; @@ -87,7 +85,7 @@ constexpr std::array NUM_ARGS{ /// Get the number of arguments an opcode accepts [[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { - return Detail::NUM_ARGS[static_cast(op)]; + return static_cast(Detail::NUM_ARGS[static_cast(op)]); } /// Get the required type of an argument of an opcode From 0ace34575cd099fb0db955ab32c215106ef19f84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:19:14 -0300 Subject: [PATCH 282/770] shader: Require dual source blending --- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 038231298..9c609e504 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -219,7 +219,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .geometryShader = true, .tessellationShader = true, .sampleRateShading = true, - .dualSrcBlend = false, + .dualSrcBlend = true, .logicOp = false, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, @@ -678,6 +678,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), + std::make_pair(features.dualSrcBlend, "dualSrcBlend"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), From aece958c2ba5d4fe37246a6a7502d182931a7483 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:50:13 -0300 Subject: [PATCH 283/770] shader: Add missing UndoUse case for GetSparseFromOp --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b424d038e..5c1b02d53 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -364,6 +364,10 @@ void Inst::UndoUse(const Value& value) { AllocAssociatedInsts(assoc_inst); RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp); + break; case Opcode::GetInBoundsFromOp: AllocAssociatedInsts(assoc_inst); RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); From 92a01984e6315f3c214990926c8fa5b4474ed339 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:29:59 -0300 Subject: [PATCH 284/770] shader: Remove invalidated blocks in dead code elimination pass --- .../ir_opt/dead_code_elimination_pass.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index f9c5334b5..1e4a3fdae 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -14,9 +14,12 @@ void DeadCodeEliminationPass(IR::Program& program) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. for (IR::Block* const block : program.post_order_blocks) { - for (IR::Inst& inst : block->Instructions() | std::views::reverse) { - if (!inst.HasUses() && !inst.MayHaveSideEffects()) { - inst.Invalidate(); + auto it{block->end()}; + while (it != block->begin()) { + --it; + if (!it->HasUses() && !it->MayHaveSideEffects()) { + it->Invalidate(); + it = block->Instructions().erase(it); } } } From 25949b864c40405946d434ecc85d6c167f323a24 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:33:49 -0300 Subject: [PATCH 285/770] shader: Fix forward referencing identity instructions when inserting phi --- .../ir_opt/ssa_rewrite_pass.cpp | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 3bab742e7..a8064a5d0 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -278,20 +278,22 @@ private: } same = op; } + // Remove the phi node from the block, it will be reinserted + IR::Block::InstructionList& list{block->Instructions()}; + list.erase(IR::Block::InstructionList::s_iterator_to(phi)); + + // Find the first non-phi instruction and use it as an insertion point + IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IsPhi)}; if (same.IsEmpty()) { // The phi is unreachable or in the start block - // First remove the phi node from the block, it will be reinserted - IR::Block::InstructionList& list{block->Instructions()}; - list.erase(IR::Block::InstructionList::s_iterator_to(phi)); - - // Insert an undef instruction after all phi nodes (to keep phi instructions on top) - const auto first_not_phi{std::ranges::find_if_not(list, IsPhi)}; - same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; - - // Insert the phi node after the undef opcode, this will be replaced with an identity - list.insert(first_not_phi, phi); + // Insert an undefined instruction and make it the phi node replacement + // The "phi" node reinsertion point is specified after this instruction + reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode); + same = IR::Value{&*reinsert_point}; + ++reinsert_point; } - // Reroute all uses of phi to same and remove phi + // Reinsert the phi node and reroute all its uses to the "same" value + list.insert(reinsert_point, phi); phi.ReplaceUsesWith(same); // TODO: Try to recursively remove all phi users, which might have become trivial return same; From 0c7230a606ae705a28c8a14590808d6bfd3656cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:34:34 -0300 Subject: [PATCH 286/770] shader: Add more strict validation the pass --- .../ir_opt/verification_pass.cpp | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 62bf5f8ff..207355ecc 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -50,9 +51,50 @@ static void ValidateUses(const IR::Program& program) { } } +static void ValidateForwardDeclarations(const IR::Program& program) { + std::set definitions; + for (const IR::Block* const block : program.blocks) { + for (const IR::Inst& inst : *block) { + definitions.emplace(&inst); + if (inst.GetOpcode() == IR::Opcode::Phi) { + // Phi nodes can have forward declarations + continue; + } + const size_t num_args{inst.NumArgs()}; + for (size_t arg = 0; arg < num_args; ++arg) { + if (inst.Arg(arg).IsImmediate()) { + continue; + } + if (!definitions.contains(inst.Arg(arg).Inst())) { + fmt::print("{}\n", IR::DumpBlock(*block)); + throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); + } + } + } + } +} + +static void ValidatePhiNodes(const IR::Program& program) { + for (const IR::Block* const block : program.blocks) { + bool no_more_phis{false}; + for (const IR::Inst& inst : *block) { + if (inst.GetOpcode() == IR::Opcode::Phi) { + if (no_more_phis) { + fmt::print("{}\n", IR::DumpBlock(*block)); + throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); + } + } else { + no_more_phis = true; + } + } + } +} + void VerificationPass(const IR::Program& program) { ValidateTypes(program); ValidateUses(program); + ValidateForwardDeclarations(program); + ValidatePhiNodes(program); } } // namespace Shader::Optimization From fe25f42403493dc4b5e801f78d7f4ec5372aa538 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:35:15 -0300 Subject: [PATCH 287/770] shader: Remove identity removal pass for better build times --- src/shader_recompiler/frontend/maxwell/program.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 14180dcd9..aee96eae3 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -143,7 +143,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Thu, 22 Apr 2021 20:32:38 -0300 Subject: [PATCH 288/770] shader: Increase the maximum number of storage buffers Compute shaders spill uniform buffers on storage buffers, increasing the expected number. --- src/shader_recompiler/shader_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 0f28ae07b..cb1969b3a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -107,7 +107,7 @@ using ImageDescriptors = boost::container::small_vector; struct Info { static constexpr size_t MAX_CBUFS{18}; - static constexpr size_t MAX_SSBOS{16}; + static constexpr size_t MAX_SSBOS{32}; bool uses_workgroup_id{}; bool uses_local_invocation_id{}; From 7a1f296cda32bdb8996f25fd1862b422ac2bfe48 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 21:05:10 -0300 Subject: [PATCH 289/770] shader: Fix render targets with null attachments --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 18 +++++--- .../renderer_vulkan/vk_render_pass_cache.cpp | 42 +++++++++---------- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b7688aef9..e43db280f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -105,6 +105,17 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); return key; } + +size_t NumAttachments(const FixedPipelineState& state) { + size_t num{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const auto format{static_cast(state.color_formats[index])}; + if (format != Tegra::RenderTargetFormat::NONE) { + num = index + 1; + } + } + return num; +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -418,17 +429,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const size_t num_attachments{NumAttachments(state)}; + for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto format{static_cast(state.color_formats[index])}; - if (format == Tegra::RenderTargetFormat::NONE) { - continue; - } const auto& blend{state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 991afe521..451ffe019 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -16,18 +16,6 @@ namespace Vulkan { namespace { using VideoCore::Surface::PixelFormat; -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; @@ -54,17 +42,29 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; + std::array references{}; + u32 num_attachments{}; + u32 num_colors{}; for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; - if (format == PixelFormat::Invalid) { - continue; + const bool is_valid{format != PixelFormat::Invalid}; + references[index] = VkAttachmentReference{ + .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (is_valid) { + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + num_attachments = static_cast(index + 1); + ++num_colors; } - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); } - const size_t num_colors{descriptions.size()}; - const VkAttachmentReference* depth_attachment{}; + const bool has_depth{key.depth_format != PixelFormat::Invalid}; + VkAttachmentReference depth_reference{}; if (key.depth_format != PixelFormat::Invalid) { - depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + depth_reference = VkAttachmentReference{ + .attachment = num_colors, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); } const VkSubpassDescription subpass{ @@ -72,10 +72,10 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputAttachmentCount = 0, .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .colorAttachmentCount = num_attachments, + .pColorAttachments = references.data(), .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, + .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, .preserveAttachmentCount = 0, .pPreserveAttachments = nullptr, }; From 2dc86372c76afb134651499452bb5074b6d1e839 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Fri, 23 Apr 2021 02:38:02 -0300 Subject: [PATCH 290/770] shader: Fix bugs and build issues on GCC --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6e9f66262..6611c1de3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -95,7 +95,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; index < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e43db280f..a8b402253 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -226,7 +226,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4415558bb..ebe073293 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -4,10 +4,10 @@ #pragma once +#include #include #include #include -#include #include #include "common/common_types.h" From d2b54c6e42853866bd88e35313a9fce4eccac03c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:09:02 -0300 Subject: [PATCH 291/770] shader: Fix VMNMX selector B --- .../frontend/maxwell/translate/impl/video_set_predicate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index ec5e74f6d..1b66abc33 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,13 +69,14 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; + const u32 b_selector{static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; const bool src_a_signed{vsetp.src_a_sign != 0}; const bool src_b_signed{vsetp.src_b_sign != 0}; const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; - const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; // Compare operation's sign is only dependent on operand b's sign const bool compare_signed{src_b_signed}; From 57464e3b72ac31fdfe3b0824c0a9fe02e2805cb8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:32:56 -0300 Subject: [PATCH 292/770] shader: Fix storage type when reading patches on tess control --- .../backend/spirv/emit_spirv_context_get_set.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5cc9d0d39..29da2ef70 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -303,7 +303,8 @@ Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { } const u32 index{IR::GenericPatchIndex(patch)}; const Id element{ctx.Const(IR::GenericPatchElement(patch))}; - const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)}; + const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; return ctx.OpLoad(ctx.F32[1], pointer); } From 5b1b06f11e4520ec9d0b7864dc822daea3e3be0c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:33:21 -0300 Subject: [PATCH 293/770] vk_graphics_pipeline: Guard against non-tessellation pipelines using patches --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a8b402253..2bc1f67ae 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -345,12 +345,18 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { + if (!spv_modules[1] && !spv_modules[2]) { + LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); + input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + } const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .topology = input_assembly_topology, .primitiveRestartEnable = state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; From 0c0ee9d8973abd7d1649df7a6b6f57b3a5570dfe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:39:00 -0300 Subject: [PATCH 294/770] vulkan_device: Require shaderClipDistance and shaderCullDistance features --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9c609e504..2318c1bda 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -249,8 +249,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, + .shaderClipDistance = true, + .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, .shaderInt16 = true, @@ -684,6 +684,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(features.shaderClipDistance, "shaderClipDistance"), + std::make_pair(features.shaderCullDistance, "shaderCullDistance"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(variable_pointers.variablePointers, "variablePointers"), std::make_pair(variable_pointers.variablePointersStorageBuffer, From 50eb03382e8ac8eb4aeb7cdc488a7ee097fec39d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 08:17:53 -0300 Subject: [PATCH 295/770] shader: Fix error checking in bitfieldExtract and implement bitfieldInsert folding --- .../ir_opt/constant_propagation_pass.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 770d3de61..f16c5e8f6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -553,7 +553,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return; case IR::Opcode::BitFieldUExtract: FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { - if (static_cast(shift) + static_cast(count) > Common::BitSize()) { + if (static_cast(shift) + static_cast(count) > 32) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } @@ -563,13 +563,22 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::BitFieldSExtract: FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { const size_t back_shift{static_cast(shift) + static_cast(count)}; - if (back_shift > Common::BitSize()) { + const size_t left_shift{32 - back_shift}; + const size_t right_shift{static_cast(32 - count)}; + if (back_shift >= 32 || left_shift >= 32 || right_shift >= 32) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } - const size_t left_shift{Common::BitSize() - back_shift}; - return static_cast(static_cast(base << left_shift) >> - static_cast(Common::BitSize() - count)); + return static_cast((base << left_shift) >> right_shift); + }); + return; + case IR::Opcode::BitFieldInsert: + FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { + if (bits >= 32 || offset >= 32) { + throw LogicError("Undefined result in {}({}, {}, {}, {})", + IR::Opcode::BitFieldInsert, base, insert, offset, bits); + } + return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; case IR::Opcode::BranchConditional: From 8fda599a316b7b3a5e017cb01db1e9c021ce7654 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 21:24:30 -0300 Subject: [PATCH 296/770] vk_compute_pipeline: Fix index comparison oversight on compute texture buffers --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6611c1de3..990ead575 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 i = 0; index < desc.count; ++i) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; From 20e86fd61512626e267824c1a5469084c2d36c7a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 24 Apr 2021 00:49:14 -0400 Subject: [PATCH 297/770] shader: Fix BFE s32 undefined check Our unit tests were hitting this exception. --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f16c5e8f6..b1c45d13a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -565,7 +565,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { const size_t back_shift{static_cast(shift) + static_cast(count)}; const size_t left_shift{32 - back_shift}; const size_t right_shift{static_cast(32 - count)}; - if (back_shift >= 32 || left_shift >= 32 || right_shift >= 32) { + if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, base, shift, count); } From f4ace63957ee47c4e3e913954f07375d0391beae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:27:25 -0300 Subject: [PATCH 298/770] shader: Accelerate pipeline transitions and use dirty flags for shaders --- src/video_core/dirty_flags.cpp | 6 +++ src/video_core/dirty_flags.h | 2 + .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_state_tracker.cpp | 6 --- .../renderer_opengl/gl_state_tracker.h | 1 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 ++++++++-------- .../renderer_vulkan/vk_graphics_pipeline.h | 54 ++++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 31 ++++++++--- .../renderer_vulkan/vk_pipeline_cache.h | 30 ++--------- 9 files changed, 114 insertions(+), 64 deletions(-) diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { FillBlock(table, OFF(zeta), NUM(zeta), flag); } } + +void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), + NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); +} } // Anonymous namespace void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { SetupIndexBuffer(tables); SetupDirtyDescriptors(tables); SetupDirtyRenderTargets(tables); + SetupDirtyShaders(tables); } } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -36,6 +36,8 @@ enum : u8 { IndexBuffer, + Shaders, + LastCommonEntry, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3551dbdcc..dd1937863 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyShaders(Tables& tables) { - FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, - Shaders); -} - void SetupDirtyPolygonModes(Tables& tables) { tables[0][OFF(polygon_mode_front)] = PolygonModeFront; tables[0][OFF(polygon_mode_back)] = PolygonModeBack; @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyScissors(tables); SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); - SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); SetupDirtyDepthTest(tables); SetupDirtyStencilTest(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -52,7 +52,6 @@ enum : u8 { BlendState0, BlendState7 = BlendState0 + 7, - Shaders, ClipDistances, PolygonModes, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2bc1f67ae..100a5e07a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state_, + const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ - std::move(stages)} { + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } } +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); +} + void GraphicsPipeline::Configure(bool is_indexed) { static constexpr size_t max_images_elements = 64; std::array image_view_ids; @@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { - dynamic = state.dynamic_state; + dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; + const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast(index), @@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), - .divisor = state.binding_divisors[index], + .divisor = key.state.binding_divisors[index], }); } } static_vector vertex_attributes; const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; if (!attribute.enabled || !input_attributes[index].used) { continue; } @@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { if (!spv_modules[1] && !spv_modules[2]) { LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); @@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, @@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pScissors = nullptr, }; std::array swizzles; - std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = - MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - const size_t num_attachments{NumAttachments(state)}; + const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, @@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend{state.attachments[index]}; + const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7d14d2378..fd787840b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,10 +4,12 @@ #pragma once +#include #include #include #include #include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -20,6 +22,39 @@ namespace Vulkan { +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; class RenderPassCache; class VKScheduler; @@ -35,7 +70,8 @@ public: const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, const FixedPipelineState& state, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); @@ -47,16 +83,30 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; + void AddTransition(GraphicsPipeline* transition); + + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; + } + private: void MakePipeline(const Device& device, VkRenderPass render_pass); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - const FixedPipelineState state; + + std::vector transition_keys; + std::vector transitions; std::array spv_modules; std::array stage_infos; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4317b2ac7..2bd870060 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (!RefreshStages()) { + current_pipeline = nullptr; return nullptr; } graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return current_pipeline; + } + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; - if (!is_new) { - return pipeline.get(); + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - pipeline = CreateGraphicsPipeline(); - return pipeline.get(); + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return current_pipeline; } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { } bool PipelineCache::RefreshStages() { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_valid_shaders; + } + dirty[VideoCommon::Dirty::Shaders] = false; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { @@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() { const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_valid_shaders = false; return false; } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; @@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() { shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } + last_valid_shaders = true; return true; } @@ -832,8 +852,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), - infos); + update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e12e4422f..ad569acc4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -struct GraphicsPipelineCacheKey { - std::array unique_hashes; - FixedPipelineState state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - size_t Size() const noexcept { - return sizeof(unique_hashes) + state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - } // namespace Vulkan namespace std { @@ -89,13 +69,6 @@ struct hash { } }; -template <> -struct hash { - size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - } // namespace std namespace Vulkan { @@ -181,7 +154,10 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + std::array shader_infos{}; + bool last_valid_shaders{}; std::unordered_map> compute_cache; std::unordered_map> graphics_cache; From 5ed871398b0e89cb3f2e3eb740d431f5faaa12e4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:28:02 -0300 Subject: [PATCH 299/770] vk_graphics_pipeline: Generate specialized pipeline config functions and improve code --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 239 +++++++++++++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 22 +- 2 files changed, 230 insertions(+), 31 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 100a5e07a..674226cb7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,14 +19,24 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" +#ifdef _MSC_VER +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace Vulkan { namespace { using boost::container::small_vector; using boost::container::static_vector; +using Shader::ImageBufferDescriptor; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; +constexpr size_t MAX_IMAGE_ELEMENTS = 64; + DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { @@ -116,6 +126,80 @@ size_t NumAttachments(const FixedPipelineState& state) { } return num; } + +template +bool Passes(const std::array& modules, + const std::array& stage_infos) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (!Spec::enabled_stages[stage] && modules[stage]) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& modules, + const std::array& stage_infos) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(modules, stage_infos)) { + return FindSpec(modules, stage_infos); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +ConfigureFuncPtr ConfigureFunc(const std::array& modules, + const std::array& infos) { + return FindSpec(modules, infos); +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -144,6 +228,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; + Validate(); MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -155,6 +240,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } else { func(); } + configure_func = ConfigureFunc(spv_modules, stage_infos); } void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { @@ -162,26 +248,29 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { transitions.push_back(transition); } -void GraphicsPipeline::Configure(bool is_indexed) { - static constexpr size_t max_images_elements = 64; - std::array image_view_ids; - static_vector image_view_indices; - static_vector samplers; +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; const auto read_handle{[&](const auto& desc, u32 index) { @@ -207,33 +296,60 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index++] = handle.image; } }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_image); - std::ranges::for_each(info.image_buffer_descriptors, add_image); + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + samplers[image_index] = sampler->Handle(); + ++image_index; } } - std::ranges::for_each(info.image_descriptors, add_image); + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; @@ -245,29 +361,75 @@ void GraphicsPipeline::Configure(bool is_indexed) { } }}; const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { + buffer_cache.UnbindGraphicsTextureBuffers(stage); + } + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } for (const auto& desc : info.texture_descriptors) { texture_buffer_index += desc.count; } - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); } - buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); const VkSampler* samplers_it{samplers.data()}; const ImageId* views_it{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, update_descriptor_queue); + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + ConfigureDraw(); +} + +void GraphicsPipeline::ConfigureDraw() { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -550,4 +712,23 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa }); } +void GraphicsPipeline::Validate() { + size_t num_images{}; + for (const auto& info : stage_infos) { + for (const auto& desc : info.texture_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_images <= MAX_IMAGE_ELEMENTS); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index fd787840b..edab5703f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -75,8 +75,6 @@ public: std::array stages, const std::array& infos); - void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -85,6 +83,10 @@ public: void AddTransition(GraphicsPipeline* transition); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; @@ -94,9 +96,23 @@ public: : nullptr; } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: + template + void ConfigureImpl(bool is_indexed); + + void ConfigureDraw(); + void MakePipeline(const Device& device, VkRenderPass render_pass); + void Validate(); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; @@ -105,6 +121,8 @@ private: VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; + void (*configure_func)(GraphicsPipeline*, bool); + std::vector transition_keys; std::vector transitions; From 2f3c3dfc10a318f63862c4976f0608ea50c19387 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 00:15:32 -0300 Subject: [PATCH 300/770] vulkan: Rework descriptor allocation algorithm Create multiple descriptor pools on demand. There are some degrees of freedom what is considered a compatible pool to avoid wasting large pools on small descriptors. --- src/video_core/renderer_vulkan/blit_image.cpp | 19 +- src/video_core/renderer_vulkan/blit_image.h | 2 +- .../renderer_vulkan/vk_buffer_cache.cpp | 2 +- .../renderer_vulkan/vk_buffer_cache.h | 4 +- .../renderer_vulkan/vk_compute_pass.cpp | 202 +++++++++--------- .../renderer_vulkan/vk_compute_pass.h | 28 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 +- .../renderer_vulkan/vk_compute_pipeline.h | 2 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 170 ++++++++++----- .../renderer_vulkan/vk_descriptor_pool.h | 62 ++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.h | 6 +- .../renderer_vulkan/vk_rasterizer.h | 2 +- 15 files changed, 314 insertions(+), 197 deletions(-) diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 39fe9289b..4058f62cd 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA .bindingCount = 1, .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, }; +template +inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 0, + .texture_buffers = 0, + .image_buffers = 0, + .textures = num_textures, + .images = 0, + .score = 2, +}; constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -326,14 +336,16 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, - StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + StateTracker& state_tracker_, DescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), - one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), - two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_descriptor_allocator{ + descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, + two_textures_descriptor_allocator{ + descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(one_texture_set_layout.address()))), two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( @@ -415,7 +427,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { - ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, - StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index cdda56ab1..568993c58 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -116,7 +116,7 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool) + DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index ea17406dc..c52001b5a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -16,7 +16,7 @@ namespace Vulkan { class Device; -class VKDescriptorPool; +class DescriptorPool; class VKScheduler; class BufferCacheRuntime; @@ -61,7 +61,7 @@ public: explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool); + DescriptorPool& descriptor_pool); void Finish(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index ec9866605..e2f3d16bf 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; constexpr size_t ASTC_NUM_BINDINGS = 4; -VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - return { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = static_cast(size), - }; -} +template +inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast(size), +}; -std::array BuildInputOutputDescriptorSetBindings() { - return {{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} - -std::array BuildASTCDescriptorSetBindings() { - return {{ - { - .binding = ASTC_BINDING_INPUT_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_OUTPUT_IMAGE, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} - -VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - return { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 2, +constexpr std::array INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = 0, - .stride = sizeof(DescriptorUpdateEntry), - }; -} + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; -std::array -BuildASTCPassDescriptorUpdateTemplateEntry() { - return {{ +constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 2, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 0, + .score = 2, +}; + +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = ASTC_BINDING_INPUT_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_ENC_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_SWIZZLE_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_OUTPUT_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo ASTC_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 3, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 1, + .score = 4, +}; + +constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), +}; + +constexpr std::array + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, .dstArrayElement = 0, @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { .stride = sizeof(DescriptorUpdateEntry), }, }}; -} struct AstcPushConstants { std::array blocks_dims; @@ -159,14 +170,13 @@ struct AstcPushConstants { u32 block_height; u32 block_height_mask; }; - } // Anonymous namespace -VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, - std::span code) { +ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -196,8 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .pipelineLayout = *layout, .set = 0, }); - - descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); } module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -226,23 +235,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ }); } -VKComputePass::~VKComputePass() = default; +ComputePass::~ComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue) { +VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { if (!descriptor_template) { return nullptr; } - const VkDescriptorSet set = descriptor_allocator->Commit(); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(descriptor_template.address(), set); return set; } -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), + : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, + VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -277,12 +286,12 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), + : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -337,14 +346,13 @@ std::pair QuadIndexedPass::Assemble( } ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_) - : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), - BuildASTCPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(AstcPushConstants)), - ASTC_DECODER_COMP_SPV), + : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..54c1ac4cb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include @@ -27,13 +26,14 @@ class VKUpdateDescriptorQueue; class Image; struct StagingBufferRef; -class VKComputePass { +class ComputePass { public: - explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, std::span code); - ~VKComputePass(); + explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code); + ~ComputePass(); protected: VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); @@ -44,14 +44,14 @@ protected: private: vk::DescriptorSetLayout descriptor_set_layout; - std::optional descriptor_allocator; + DescriptorAllocator descriptor_allocator; vk::ShaderModule module; }; -class Uint8Pass final : public VKComputePass { +class Uint8Pass final : public ComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); @@ -66,10 +66,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class QuadIndexedPass final : public VKComputePass { +class QuadIndexedPass final : public ComputePass { public: explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); @@ -84,10 +84,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class ASTCDecoderPass final : public VKComputePass { +class ASTCDecoderPass final : public ComputePass { public: explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 990ead575..54a57c358 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,7 +18,7 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) @@ -30,7 +30,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_set_layout = builder.CreateDescriptorSetLayout(); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8efdc2926..0d4cd37be 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -25,7 +25,7 @@ class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* thread_worker, const Shader::Info& info, vk::ShaderModule spv_module); diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 3bea1ff44..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include #include "common/common_types.h" @@ -13,77 +15,149 @@ namespace Vulkan { -// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. -constexpr std::size_t SETS_GROW_RATE = 0x20; +// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines +constexpr size_t SETS_GROW_RATE = 16; +constexpr s32 SCORE_THRESHOLD = 3; +constexpr u32 SETS_PER_POOL = 64; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, - VkDescriptorSetLayout layout_) - : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{&descriptor_pool_}, layout{layout_} {} +struct DescriptorBank { + DescriptorBankInfo info; + std::vector pools; +}; -VkDescriptorSet DescriptorAllocator::Commit() { - const std::size_t index = CommitResource(); - return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { + return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && + texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && + textures >= subset.textures && images >= subset.image_buffers; } -void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); +template +static u32 Accumulate(const Descriptors& descriptors) { + u32 count = 0; + for (const auto& descriptor : descriptors) { + count += descriptor.count; + } + return count; } -VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) - : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ - AllocateNewPool()} {} +static DescriptorBankInfo MakeBankInfo(std::span infos) { + DescriptorBankInfo bank; + for (const Shader::Info& info : infos) { + bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); + bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); + bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); + bank.image_buffers += Accumulate(info.image_buffer_descriptors); + bank.textures += Accumulate(info.texture_descriptors); + bank.images += Accumulate(info.image_descriptors); + } + bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + + bank.image_buffers + bank.textures + bank.images; + return bank; +} -VKDescriptorPool::~VKDescriptorPool() = default; - -vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { - static constexpr u32 num_sets = 0x20000; - static constexpr VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, +static void AllocatePool(const Device& device, DescriptorBank& bank) { + std::array pool_sizes; + size_t pool_cursor{}; + const auto add = [&](VkDescriptorType type, u32 count) { + if (count > 0) { + pool_sizes[pool_cursor++] = { + .type = type, + .descriptorCount = count * SETS_PER_POOL, + }; + } }; - - const VkDescriptorPoolCreateInfo ci{ + const auto& info{bank.info}; + add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); + add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); + add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); + add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); + bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - .maxSets = num_sets, - .poolSizeCount = static_cast(std::size(pool_sizes)), + .maxSets = SETS_PER_POOL, + .poolSizeCount = static_cast(pool_cursor), .pPoolSizes = std::data(pool_sizes), - }; - return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); + })); } -vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, - std::size_t count) { - const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai{ +DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_) + : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, + layout{layout_} {} + +VkDescriptorSet DescriptorAllocator::Commit() { + const size_t index = CommitResource(); + return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +} + +void DescriptorAllocator::Allocate(size_t begin, size_t end) { + sets.push_back(AllocateDescriptors(end - begin)); +} + +vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { + const std::vector layouts(count, layout); + VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = nullptr, - .descriptorPool = **active_pool, + .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast(count), - .pSetLayouts = layout_copies.data(), + .pSetLayouts = layouts.data(), }; - - vk::DescriptorSets sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // Our current pool is out of memory. Allocate a new one and retry - active_pool = AllocateNewPool(); - ai.descriptorPool = **active_pool; - sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + AllocatePool(*device, *bank); + allocate_info.descriptorPool = *bank->pools.back(); + new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // After allocating a new pool, we are out of memory again. We can't handle this from here. throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } +DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} + +DescriptorPool::~DescriptorPool() = default; + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + std::span infos) { + return Allocator(layout, MakeBankInfo(infos)); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const Shader::Info& info) { + return Allocator(layout, MakeBankInfo(std::array{info})); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const DescriptorBankInfo& info) { + return DescriptorAllocator(device, master_semaphore, Bank(info), layout); +} + +DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { + std::shared_lock read_lock{banks_mutex}; + const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { + return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); + }); + if (it != bank_infos.end()) { + return *banks[std::distance(bank_infos.begin(), it)].get(); + } + read_lock.unlock(); + + std::unique_lock write_lock{banks_mutex}; + bank_infos.push_back(reqs); + + auto& bank = *banks.emplace_back(std::make_unique()); + bank.info = reqs; + AllocatePool(device, bank); + return bank; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 2501f9967..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -4,21 +4,38 @@ #pragma once +#include +#include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKDescriptorPool; class VKScheduler; +struct DescriptorBank; + +struct DescriptorBankInfo { + [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; + + u32 uniform_buffers{}; ///< Number of uniform buffer descriptors + u32 storage_buffers{}; ///< Number of storage buffer descriptors + u32 texture_buffers{}; ///< Number of texture buffer descriptors + u32 image_buffers{}; ///< Number of image buffer descriptors + u32 textures{}; ///< Number of texture descriptors + u32 images{}; ///< Number of image descriptors + s32 score{}; ///< Number of descriptors in total +}; + class DescriptorAllocator final : public ResourcePool { + friend class DescriptorPool; + public: explicit DescriptorAllocator() = default; - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override = default; DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; @@ -29,36 +46,43 @@ public: VkDescriptorSet Commit(); -protected: - void Allocate(std::size_t begin, std::size_t end) override; - private: - VKDescriptorPool* descriptor_pool{}; + explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_); + + void Allocate(size_t begin, size_t end) override; + + vk::DescriptorSets AllocateDescriptors(size_t count); + + const Device* device{}; + DescriptorBank* bank{}; VkDescriptorSetLayout layout{}; - std::vector descriptors_allocations; + std::vector sets; }; -class VKDescriptorPool final { - friend DescriptorAllocator; - +class DescriptorPool { public: - explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); - ~VKDescriptorPool(); + explicit DescriptorPool(const Device& device, VKScheduler& scheduler); + ~DescriptorPool(); - VKDescriptorPool(const VKDescriptorPool&) = delete; - VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + DescriptorPool& operator=(const DescriptorPool&) = delete; + DescriptorPool(const DescriptorPool&) = delete; + + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, + std::span infos); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); private: - vk::DescriptorPool* AllocateNewPool(); - - vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); + DescriptorBank& Bank(const DescriptorBankInfo& reqs); const Device& device; MasterSemaphore& master_semaphore; - std::vector pools; - vk::DescriptorPool* active_pool; + std::shared_mutex banks_mutex; + std::vector bank_infos; + std::vector> banks; }; } // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 674226cb7..0526c197a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,7 +205,7 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, @@ -220,7 +220,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index edab5703f..454fc049e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -67,7 +67,7 @@ public: explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2bd870060..9d9729022 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -647,7 +647,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ad569acc4..eec17d3fd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -75,10 +75,10 @@ namespace Vulkan { class ComputePipeline; class Device; +class DescriptorPool; class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; -class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -105,7 +105,7 @@ public: Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); @@ -147,7 +147,7 @@ private: const Device& device; VKScheduler& scheduler; - VKDescriptorPool& descriptor_pool; + DescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; RenderPassCache& render_pass_cache; BufferCache& buffer_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2f1551e65..1302bed02 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -147,7 +147,7 @@ private: VKScheduler& scheduler; StagingBufferPool staging_pool; - VKDescriptorPool descriptor_pool; + DescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; From ac8835659ead30d289ff8b907a2295d87790670f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 01:04:49 -0300 Subject: [PATCH 301/770] vulkan: Defer descriptor set work to the Vulkan thread Move descriptor lookup and update code to a separate thread. Delaying this removes work from the main GPU thread and allows creating descriptor layouts on another thread. This reduces a bit the workload of the main thread when new pipelines are encountered. --- .../renderer_vulkan/vk_compute_pass.cpp | 45 +++++++++---------- .../renderer_vulkan/vk_compute_pass.h | 8 ++-- .../renderer_vulkan/vk_compute_pipeline.cpp | 36 ++++++++------- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 40 ++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.h | 5 ++- .../renderer_vulkan/vk_update_descriptor.cpp | 9 ---- .../renderer_vulkan/vk_update_descriptor.h | 4 +- 8 files changed, 69 insertions(+), 79 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e2f3d16bf..7e5ba283b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -172,11 +172,12 @@ struct AstcPushConstants { }; } // Anonymous namespace -ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, +ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, const DescriptorBankInfo& bank_info, - vk::Span push_constants, std::span code) { + vk::Span push_constants, std::span code) + : device{device_} { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { - if (!descriptor_template) { - return nullptr; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(descriptor_template.address(), set); - return set; -} - Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -265,10 +257,11 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const VkBuffer buffer{staging.buffer}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -276,6 +269,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); @@ -321,10 +316,10 @@ std::pair QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ @@ -333,7 +328,9 @@ std::pair QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - const std::array push_constants = {base_vertex, index_shift}; + const std::array push_constants{base_vertex, index_shift}; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), @@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), - device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; @@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); - - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); - const VkPipelineLayout vk_layout = *layout; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); - scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, - block_dims, params, set](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, + params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .bytes_per_block_log2 = params.bytes_per_block_log2, @@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .block_height = params.block_height, .block_height_mask = params.block_height_mask, }; - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); - cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 54c1ac4cb..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -36,15 +36,14 @@ public: ~ComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); - + const Device& device; vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; vk::Pipeline pipeline; - -private: vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; + +private: vk::ShaderModule module; }; @@ -99,7 +98,6 @@ public: private: void MakeDataBuffer(); - const Device& device; VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 54a57c358..feaace0c5 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,21 +18,22 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutBuilder builder{device.GetLogical()}; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + auto func{[this, &descriptor_pool] { + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); - descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - scheduler.Record([this](vk::CommandBuffer cmdbuf) { + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - }); - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0d4cd37be..a560e382e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -40,6 +40,7 @@ public: VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: + const Device& device; VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0526c197a..76080bde1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, DescriptorPool& descriptor_pool, + const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + auto func{[this, &render_pass_cache, &descriptor_pool] { + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); - MakePipeline(device, render_pass); + MakePipeline(render_pass); std::lock_guard lock{build_mutex}; is_built = true; @@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() { build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - if (scheduler.UpdateGraphicsPipeline(this)) { - scheduler.Record([this](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - }); - } - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = key.state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 454fc049e..85e21f611 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -109,19 +109,20 @@ private: void ConfigureDraw(); - void MakePipeline(const Device& device, VkRenderPass render_pass); + void MakePipeline(VkRenderPass render_pass); void Validate(); const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; + const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - void (*configure_func)(GraphicsPipeline*, bool); + void (*configure_func)(GraphicsPipeline*, bool){}; std::vector transition_keys; std::vector transitions; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index bea9b8012..ce3427c9b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, - VkDescriptorSet set) { - const void* const data = upload_start; - const vk::Device* const logical = &device.GetLogical(); - scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, *update_template, data); - }); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82bc9920c..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,9 @@ public: void Acquire(); - void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); + const DescriptorUpdateEntry* UpdateData() const noexcept { + return upload_start; + } void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ From 025b20f96ae588777e3ff11083cc4184bf418af6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 03:53:26 -0300 Subject: [PATCH 302/770] shader: Move pipeline cache logic to separate files Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL. --- src/video_core/CMakeLists.txt | 3 + .../renderer_opengl/gl_rasterizer.h | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 19 +- .../renderer_opengl/gl_shader_cache.h | 58 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 721 +++--------------- .../renderer_vulkan/vk_pipeline_cache.h | 30 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_cache.cpp | 233 ++++++ src/video_core/shader_cache.h | 198 ++--- src/video_core/shader_environment.cpp | 453 +++++++++++ src/video_core/shader_environment.h | 198 +++++ 12 files changed, 1095 insertions(+), 824 deletions(-) create mode 100644 src/video_core/shader_cache.cpp create mode 100644 src/video_core/shader_environment.cpp create mode 100644 src/video_core/shader_environment.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3166a69dc..6e0e4b8f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -145,7 +145,10 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + shader_cache.cpp shader_cache.h + shader_environment.cpp + shader_environment.h shader_notify.cpp shader_notify.h surface.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1f58f8791..2fdcbe4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -217,7 +217,7 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - ShaderCacheOpenGL shader_cache; + ShaderCache shader_cache; QueryCache query_cache; AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4dd166156..c3e490b40 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -29,18 +29,13 @@ namespace OpenGL { -Shader::Shader() = default; +ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + emu_window{emu_window_}, gpu{gpu_}, device{device_} {} -Shader::~Shader() = default; - -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) - : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} - -ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; +ShaderCache::~ShaderCache() = default; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ad3d15a76..96520e17c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -36,27 +36,59 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -class Shader { +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + std::array color_formats; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, u32> tessellation_primitive; + BitField<8, 2, u32> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + u32 padding; + TransformFeedbackState xfb_state; + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { public: - explicit Shader(); - ~Shader(); +private: }; -class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { +class ShaderCache : public VideoCommon::ShaderCache { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); - ~ShaderCacheOpenGL() override; + explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); + ~ShaderCache(); private: Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -23,7 +23,7 @@ namespace Vulkan { struct GraphicsPipelineCacheKey { - std::array unique_hashes; + std::array unique_hashes; FixedPipelineState state; size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,7 +11,8 @@ #include "common/bit_cast.h" #include "common/cityhash.h" -#include "common/file_util.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" #include "common/thread_worker.h" #include "core/core.h" @@ -36,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -43,449 +45,19 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -static u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | offset; -} - -class GenericEnvironment : public Shader::Environment { -public: - explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} { - start_address = start_address_; - } - - ~GenericEnvironment() override = default; - - u32 TextureBoundBuffer() const final { - return texture_bound; - } - - u32 LocalMemorySize() const final { - return local_memory_size; - } - - u32 SharedMemorySize() const final { - return shared_memory_size; - } - - std::array WorkgroupSize() const final { - return workgroup_size; - } - - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - - std::optional Analyze() { - const std::optional size{TryFindSize()}; - if (!size) { - return std::nullopt; - } - cached_lowest = start_address; - cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), *size); - } - - void SetCachedSize(size_t size_bytes) { - cached_lowest = start_address; - cached_highest = start_address + static_cast(size_bytes); - code.resize(CachedSize()); - gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); - } - - [[nodiscard]] size_t CachedSize() const noexcept { - return cached_highest - cached_lowest + INST_SIZE; - } - - [[nodiscard]] size_t ReadSize() const noexcept { - return read_highest - read_lowest + INST_SIZE; - } - - [[nodiscard]] bool CanBeSerialized() const noexcept { - return !has_unbound_instructions; - } - - [[nodiscard]] u128 CalculateHash() const { - const size_t size{ReadSize()}; - const auto data{std::make_unique(size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(data.get(), size); - } - - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(CachedSize())}; - const u64 num_texture_types{static_cast(texture_types.size())}; - const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - - file.write(reinterpret_cast(&code_size), sizeof(code_size)) - .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) - .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) - .write(reinterpret_cast(&stage), sizeof(stage)) - .write(reinterpret_cast(code.data()), code_size); - for (const auto [key, type] : texture_types) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - for (const auto [key, type] : cbuf_values) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - if (stage == Shader::Stage::Compute) { - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .write(reinterpret_cast(&shared_memory_size), - sizeof(shared_memory_size)); - } else { - file.write(reinterpret_cast(&sph), sizeof(sph)); - } - } - -protected: - static constexpr size_t INST_SIZE = sizeof(u64); - - std::optional TryFindSize() { - constexpr size_t BLOCK_SIZE = 0x1000; - constexpr size_t MAXIMUM_SIZE = 0x100000; - - constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - - GPUVAddr guest_addr{program_base + start_address}; - size_t offset{0}; - size_t size{BLOCK_SIZE}; - while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); - u64* const data = code.data() + offset / INST_SIZE; - gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { - const u64 inst = data[index / INST_SIZE]; - if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + index; - } - } - guest_addr += BLOCK_SIZE; - size += BLOCK_SIZE; - offset += BLOCK_SIZE; - } - return std::nullopt; - } - - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; - Tegra::Texture::TICEntry entry; - gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - - const Shader::TextureType result{[&] { - switch (entry.texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return Shader::TextureType::Color1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return Shader::TextureType::Color2D; - case Tegra::Texture::TextureType::Texture3D: - return Shader::TextureType::Color3D; - case Tegra::Texture::TextureType::TextureCubemap: - return Shader::TextureType::ColorCube; - case Tegra::Texture::TextureType::Texture1DArray: - return Shader::TextureType::ColorArray1D; - case Tegra::Texture::TextureType::Texture2DArray: - return Shader::TextureType::ColorArray2D; - case Tegra::Texture::TextureType::Texture1DBuffer: - return Shader::TextureType::Buffer; - case Tegra::Texture::TextureType::TextureCubeArray: - return Shader::TextureType::ColorArrayCube; - default: - throw Shader::NotImplementedException("Unknown texture type"); - } - }()}; - texture_types.emplace(raw, result); - return result; - } - - Tegra::MemoryManager* gpu_memory{}; - GPUVAddr program_base{}; - - std::vector code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - - u32 local_memory_size{}; - u32 texture_bound{}; - u32 shared_memory_size{}; - std::array workgroup_size{}; - - u32 read_lowest = std::numeric_limits::max(); - u32 read_highest = 0; - - u32 cached_lowest = std::numeric_limits::max(); - u32 cached_highest = 0; - - bool has_unbound_instructions = false; -}; - -namespace { -using Shader::Backend::SPIRV::EmitSPIRV; -using Shader::Maxwell::TranslateProgram; - -// TODO: Move this to a separate file -constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{2}; - -class GraphicsEnvironment final : public GenericEnvironment { -public: - explicit GraphicsEnvironment() = default; - explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); - switch (program) { - case Maxwell::ShaderProgram::VertexA: - stage = Shader::Stage::VertexA; - stage_index = 0; - break; - case Maxwell::ShaderProgram::VertexB: - stage = Shader::Stage::VertexB; - stage_index = 0; - break; - case Maxwell::ShaderProgram::TesselationControl: - stage = Shader::Stage::TessellationControl; - stage_index = 1; - break; - case Maxwell::ShaderProgram::TesselationEval: - stage = Shader::Stage::TessellationEval; - stage_index = 2; - break; - case Maxwell::ShaderProgram::Geometry: - stage = Shader::Stage::Geometry; - stage_index = 3; - break; - case Maxwell::ShaderProgram::Fragment: - stage = Shader::Stage::Fragment; - stage_index = 4; - break; - default: - UNREACHABLE_MSG("Invalid program={}", program); - break; - } - const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); - texture_bound = maxwell3d->regs.tex_cb_index; - } - - ~GraphicsEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.address + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{maxwell3d->regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); - } - -private: - Tegra::Engines::Maxwell3D* maxwell3d{}; - size_t stage_index{}; -}; - -class ComputeEnvironment final : public GenericEnvironment { -public: - explicit ComputeEnvironment() = default; - explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ - &kepler_compute_} { - const auto& qmd{kepler_compute->launch_description}; - stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; - texture_bound = kepler_compute->regs.tex_cb_index; - shared_memory_size = qmd.shared_alloc; - workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - - ~ComputeEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.Address() + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{kepler_compute->regs}; - const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); - } - -private: - Tegra::Engines::KeplerCompute* kepler_compute{}; -}; - -void SerializePipeline(std::span key, std::span envs, - std::ofstream& file) { - if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { - return; - } - const u32 num_envs{static_cast(envs.size())}; - file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); - for (const GenericEnvironment* const env : envs) { - env->Serialize(file); - } - file.write(key.data(), key.size_bytes()); -} - -template -void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { - try { - std::ofstream file; - file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); - if (!file.is_open()) { - LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); - return; - } - if (file.tellp() == 0) { - file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) - .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); - } - const std::span key_span(reinterpret_cast(&key), sizeof(key)); - SerializePipeline(key_span, MakeSpan(envs), file); - - } catch (const std::ios_base::failure& e) { - LOG_ERROR(Common_Filesystem, "{}", e.what()); - if (!Common::FS::Delete(filename)) { - LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); - } - } -} - -class FileEnvironment final : public Shader::Environment { -public: - void Deserialize(std::ifstream& file) { - u64 code_size{}; - u64 num_texture_types{}; - u64 num_cbuf_values{}; - file.read(reinterpret_cast(&code_size), sizeof(code_size)) - .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .read(reinterpret_cast(&start_address), sizeof(start_address)) - .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .read(reinterpret_cast(&read_highest), sizeof(read_highest)) - .read(reinterpret_cast(&stage), sizeof(stage)); - code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast(code.get()), code_size); - for (size_t i = 0; i < num_texture_types; ++i) { - u32 key; - Shader::TextureType type; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&type), sizeof(type)); - texture_types.emplace(key, type); - } - for (size_t i = 0; i < num_cbuf_values; ++i) { - u64 key; - u32 value; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&value), sizeof(value)); - cbuf_values.emplace(key, value); - } - if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); - } else { - file.read(reinterpret_cast(&sph), sizeof(sph)); - } - } - - u64 ReadInstruction(u32 address) override { - if (address < read_lowest || address > read_highest) { - throw Shader::LogicError("Out of bounds address {}", address); - } - return code[(address - read_lowest) / sizeof(u64)]; - } - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; - if (it == cbuf_values.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto it{texture_types.find(handle)}; - if (it == texture_types.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - u32 LocalMemorySize() const override { - return local_memory_size; - } - - u32 SharedMemorySize() const override { - return shared_memory_size; - } - - u32 TextureBoundBuffer() const override { - return texture_bound; - } - - std::array WorkgroupSize() const override { - return workgroup_size; - } - -private: - std::unique_ptr code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - std::array workgroup_size{}; - u32 local_memory_size{}; - u32 shared_memory_size{}; - u32 texture_bound{}; - u32 read_lowest{}; - u32 read_highest{}; -}; - Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso } } // Anonymous namespace -void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - if (title_id == 0) { - return; - } - std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; - std::string base_dir{shader_dir + "/vulkan"}; - std::string transferable_dir{base_dir + "/transferable"}; - std::string precompiled_dir{base_dir + "/precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); - return; - } - pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - - struct { - std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; - } state; - - std::ifstream file; - Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - return; - } - file.exceptions(std::ifstream::failbit); - const auto end{file.tellg()}; - file.seekg(0, std::ios::beg); - - std::array magic_number; - u32 cache_version; - file.read(magic_number.data(), magic_number.size()) - .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { - file.close(); - if (Common::FS::Delete(pipeline_cache_filename)) { - if (magic_number != MAGIC_NUMBER) { - LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); - } - if (cache_version != CACHE_VERSION) { - LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); - } - } else { - LOG_ERROR(Render_Vulkan, - "Invalid pipeline cache file and failed to delete it in \"{}\"", - pipeline_cache_filename); - } - return; - } - while (file.tellg() != end) { - if (stop_loading) { - return; - } - u32 num_envs{}; - file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - std::vector envs(num_envs); - for (FileEnvironment& env : envs) { - env.Deserialize(file); - } - if (envs.front().ShaderStage() == Shader::Stage::Compute) { - ComputePipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } else { - GraphicsPipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - boost::container::static_vector env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } - ++state.total; - } - { - std::lock_guard lock{state.mutex}; - callback(VideoCore::LoadCallbackStage::Build, 0, state.total); - state.has_loaded = true; - } - workers.WaitForRequests(); -} - size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (!RefreshStages()) { + if (!RefreshStages(graphics_key.unique_hashes)) { current_pipeline = nullptr; return nullptr; } @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - const GPUVAddr shader_addr{program_base + qmd.program_start}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { + const ShaderInfo* const shader{ComputeShader()}; + if (!shader) { return nullptr; } - const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; - if (!shader) { - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; - shader = MakeShaderInfo(env, *cpu_shader_addr); - } + const auto& qmd{kepler_compute.launch_description}; const ComputePipelineCacheKey key{ - .unique_hash{shader->unique_hash}, - .shared_memory_size{qmd.shared_alloc}, + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return pipeline.get(); } -bool PipelineCache::RefreshStages() { - auto& dirty{maxwell3d.dirty.flags}; - if (!dirty[VideoCommon::Dirty::Shaders]) { - return last_valid_shaders; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - dirty[VideoCommon::Dirty::Shaders] = false; + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "vulkan"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - graphics_key.unique_hashes[index] = u128{}; - continue; - } - const auto& shader_config{maxwell3d.regs.shader_config[index]}; - const auto program{static_cast(index)}; - const GPUVAddr shader_addr{base_addr + shader_config.offset}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); - last_valid_shaders = false; - return false; - } - const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; - if (!shader_info) { - const u32 start_address{shader_config.offset}; - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; - shader_info = MakeShaderInfo(env, *cpu_shader_addr); - } - shader_infos[index] = shader_info; - graphics_key.unique_hashes[index] = shader_info->unique_hash; - } - last_valid_shaders = true; - return true; -} + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; -const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { - auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze()}) { - info->unique_hash = *cached_hash; - info->size_bytes = env.CachedSize(); - } else { - // Slow path, not really hit on commercial games - // Build a control flow graph to get the real shader size - main_pools.flow_block.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; - info->unique_hash = env.CalculateHash(); - info->size_bytes = env.ReadSize(); - } - const size_t size_bytes{info->size_bytes}; - const ShaderInfo* const result{info.get()}; - Register(std::move(info), cpu_addr, size_bytes); - return result; + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); } std::unique_ptr PipelineCache::CreateGraphicsPipeline( @@ -815,7 +288,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( size_t env_index{0}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } Shader::Environment& env{*envs[env_index]}; @@ -830,7 +303,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( u32 binding{0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } UNIMPLEMENTED_IF(index == 0); @@ -844,8 +317,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], - key.unique_hashes[index][1])}; + const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } @@ -863,7 +335,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (graphics_key.unique_hashes[index] == 0) { continue; } const auto program{static_cast(index)}; @@ -871,7 +343,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; @@ -882,11 +353,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] != u128{}) { + if (key.unique_hashes[index] != 0) { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -902,8 +373,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + VideoCommon::SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); }); } return pipeline; @@ -921,7 +392,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + const auto name{fmt::format("{:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - u128 unique_hash; + u64 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -76,16 +77,12 @@ namespace Vulkan { class ComputePipeline; class Device; class DescriptorPool; -class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -struct ShaderInfo { - u128 unique_hash{}; - size_t size_bytes{}; -}; +using VideoCommon::ShaderInfo; struct ShaderPools { void ReleaseContents() { @@ -99,17 +96,16 @@ struct ShaderPools { Shader::ObjectPool flow_block; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); - ~PipelineCache() override; + ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -119,10 +115,6 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: - bool RefreshStages(); - - const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,11 +132,6 @@ private: Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -156,16 +143,13 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - std::array shader_infos{}; - bool last_valid_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; - std::string pipeline_cache_filename; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..b8b8eace5 --- /dev/null +++ b/src/video_core/shader_cache.cpp @@ -0,0 +1,233 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" + +namespace VideoCommon { + +void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); +} + +void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); +} + +void ShaderCache::SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); +} + +ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_) + : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + rasterizer{rasterizer_} {} + +bool ShaderCache::RefreshStages(std::array& unique_hashes) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_shaders_valid; + } + dirty[VideoCommon::Dirty::Shaders] = false; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + unique_hashes[index] = 0; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_shaders_valid = false; + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); + } + shader_infos[index] = shader_info; + unique_hashes[index] = shader_info->unique_hash; + } + last_shaders_valid = true; + return true; +} + +const ShaderInfo* ShaderCache::ComputeShader() { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return nullptr; + } + if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { + return shader; + } + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + return MakeShaderInfo(env, *cpu_shader_addr); +} + +ShaderInfo* ShaderCache::TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; +} + +void ShaderCache::Register(std::unique_ptr data, VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); +} + +void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } +} + +void ShaderCache::RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::ranges::sort(marked_for_removal); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } +} + +void ShaderCache::InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { + size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } +} + +void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector& entries = entries_it->second; + + const auto entry_it = std::ranges::find(entries, entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } +} + +void ShaderCache::UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); +} + +void ShaderCache::RemoveShadersFromStorage(std::vector removed_shaders) { + // Remove them from the cache + std::erase_if(storage, [&removed_shaders](const std::unique_ptr& shader) { + return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); + }); +} + +ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { + auto entry = std::make_unique(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; +} + +const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze()}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + Shader::ObjectPool flow_block; + Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..89a4bcc84 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,20 +4,28 @@ #pragma once -#include #include #include #include #include #include -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { -template +class GenericEnvironment; + +struct ShaderInfo { + u64 unique_hash{}; + size_t size_bytes{}; +}; + class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; @@ -25,206 +33,100 @@ class ShaderCache { struct Entry { VAddr addr_start; VAddr addr_end; - T* data; + ShaderInfo* data; bool is_memory_marked = true; - constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + bool Overlaps(VAddr start, VAddr end) const noexcept { return start < addr_end && addr_start < end; } }; public: - virtual ~ShaderCache() = default; - /// @brief Removes shaders inside a given region /// @note Checks for ranges /// @param addr Start address of the invalidation /// @param size Number of bytes of the invalidation - void InvalidateRegion(VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - RemovePendingShaders(); - } + void InvalidateRegion(VAddr addr, size_t size); /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - } + void OnCPUWrite(VAddr addr, size_t size); /// @brief Flushes delayed removal operations - void SyncGuestHost() { - std::scoped_lock lock{invalidation_mutex}; - RemovePendingShaders(); - } + void SyncGuestHost(); +protected: + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_); + + /// @brief Update the hashes and information of shader stages + /// @param unique_hashes Shader hashes to store into when a stage is enabled + /// @return True no success, false on error + bool RefreshStages(std::array& unique_hashes); + + /// @brief Returns information about the current compute shader + /// @return Pointer to a valid shader, nullptr on error + const ShaderInfo* ComputeShader(); + + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + + std::array shader_infos{}; + bool last_shaders_valid = false; + +private: /// @brief Tries to obtain a cached shader starting in a given address /// @note Doesn't check for ranges, the given address has to be the start of the shader /// @param addr Start address of the shader, this doesn't cache for region /// @return Pointer to a valid shader, nullptr when nothing is found - T* TryGet(VAddr addr) const { - std::scoped_lock lock{lookup_mutex}; - - const auto it = lookup_cache.find(addr); - if (it == lookup_cache.end()) { - return nullptr; - } - return it->second->data; - } - -protected: - explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + ShaderInfo* TryGet(VAddr addr) const; /// @brief Register in the cache a given entry /// @param data Shader to store in the cache /// @param addr Start address of the shader that will be registered /// @param size Size in bytes of the shader - void Register(std::unique_ptr data, VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + void Register(std::unique_ptr data, VAddr addr, size_t size); - const VAddr addr_end = addr + size; - Entry* const entry = NewEntry(addr, addr_end, data.get()); - - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - invalidation_cache[page].push_back(entry); - } - - storage.push_back(std::move(data)); - - rasterizer.UpdatePagesCachedCount(addr, size, 1); - } - - /// @brief Called when a shader is going to be removed - /// @param shader Shader that will be removed - /// @pre invalidation_cache is locked - /// @pre lookup_mutex is locked - virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} - -private: /// @brief Invalidate pages in a given region /// @pre invalidation_mutex is locked - void InvalidatePagesInRegion(VAddr addr, std::size_t size) { - const VAddr addr_end = addr + size; - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - auto it = invalidation_cache.find(page); - if (it == invalidation_cache.end()) { - continue; - } - InvalidatePageEntries(it->second, addr, addr_end); - } - } + void InvalidatePagesInRegion(VAddr addr, size_t size); /// @brief Remove shaders marked for deletion /// @pre invalidation_mutex is locked - void RemovePendingShaders() { - if (marked_for_removal.empty()) { - return; - } - // Remove duplicates - std::sort(marked_for_removal.begin(), marked_for_removal.end()); - marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), - marked_for_removal.end()); - - std::vector removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); - - std::scoped_lock lock{lookup_mutex}; - - for (Entry* const entry : marked_for_removal) { - removed_shaders.push_back(entry->data); - - const auto it = lookup_cache.find(entry->addr_start); - ASSERT(it != lookup_cache.end()); - lookup_cache.erase(it); - } - marked_for_removal.clear(); - - if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); - } - } + void RemovePendingShaders(); /// @brief Invalidates entries in a given range for the passed page /// @param entries Vector of entries in the page, it will be modified on overlaps /// @param addr Start address of the invalidation /// @param addr_end Non-inclusive end address of the invalidation /// @pre invalidation_mutex is locked - void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { - std::size_t index = 0; - while (index < entries.size()) { - Entry* const entry = entries[index]; - if (!entry->Overlaps(addr, addr_end)) { - ++index; - continue; - } - - UnmarkMemory(entry); - RemoveEntryFromInvalidationCache(entry); - marked_for_removal.push_back(entry); - } - } + void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end); /// @brief Removes all references to an entry in the invalidation cache /// @param entry Entry to remove from the invalidation cache /// @pre invalidation_mutex is locked - void RemoveEntryFromInvalidationCache(const Entry* entry) { - const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { - const auto entries_it = invalidation_cache.find(page); - ASSERT(entries_it != invalidation_cache.end()); - std::vector& entries = entries_it->second; - - const auto entry_it = std::find(entries.begin(), entries.end(), entry); - ASSERT(entry_it != entries.end()); - entries.erase(entry_it); - } - } + void RemoveEntryFromInvalidationCache(const Entry* entry); /// @brief Unmarks an entry from the rasterizer cache /// @param entry Entry to unmark from memory - void UnmarkMemory(Entry* entry) { - if (!entry->is_memory_marked) { - return; - } - entry->is_memory_marked = false; - - const VAddr addr = entry->addr_start; - const std::size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); - } + void UnmarkMemory(Entry* entry); /// @brief Removes a vector of shaders from a list /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector removed_shaders) { - // Notify removals - for (T* const shader : removed_shaders) { - OnShaderRemoval(shader); - } - - // Remove them from the cache - const auto is_removed = [&removed_shaders](const std::unique_ptr& shader) { - return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != - removed_shaders.end(); - }; - std::erase_if(storage, is_removed); - } + void RemoveShadersFromStorage(std::vector removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked - Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { - auto entry = std::make_unique(Entry{addr, addr_end, data}); - Entry* const entry_pointer = entry.get(); + Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); - lookup_cache.emplace(addr, std::move(entry)); - return entry_pointer; - } + /// @brief Create a new shader entry and register it + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); VideoCore::RasterizerInterface& rasterizer; @@ -233,7 +135,7 @@ private: std::unordered_map> lookup_cache; std::unordered_map> invalidation_cache; - std::vector> storage; + std::vector> storage; std::vector marked_for_removal; }; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..5dccc0097 --- /dev/null +++ b/src/video_core/shader_environment.cpp @@ -0,0 +1,453 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/fs/fs.h" +#include "common/logging/log.h" +#include "shader_recompiler/environment.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_environment.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION = 3; + +constexpr size_t INST_SIZE = sizeof(u64); + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +static u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | offset; +} + +static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return Shader::TextureType::Buffer; + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } +} + +GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; +} + +GenericEnvironment::~GenericEnvironment() = default; + +u32 GenericEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +u32 GenericEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 GenericEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +std::array GenericEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +u64 GenericEnvironment::ReadInstruction(u32 address) { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); +} + +std::optional GenericEnvironment::Analyze() { + const std::optional size{TryFindSize()}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash64(reinterpret_cast(code.data()), *size); +} + +void GenericEnvironment::SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); +} + +size_t GenericEnvironment::CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; +} + +size_t GenericEnvironment::ReadSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; +} + +bool GenericEnvironment::CanBeSerialized() const noexcept { + return !has_unbound_instructions; +} + +u64 GenericEnvironment::CalculateHash() const { + const size_t size{ReadSize()}; + const auto data{std::make_unique(size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash64(data.get(), size); +} + +void GenericEnvironment::Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(CachedSize())}; + const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(reinterpret_cast(code.data()), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + if (stage == Shader::Stage::Compute) { + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } +} + +std::optional GenericEnvironment::TryFindSize() { + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + index; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; +} + +Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + const Shader::TextureType result{ConvertType(entry)}; + texture_types.emplace(raw, result); + return result; +} + +GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Maxwell::ShaderProgram program, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + stage_index = 0; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + stage_index = 0; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + stage_index = 1; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + stage_index = 2; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + stage_index = 3; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + stage_index = 4; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + break; + } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; +} + +u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); +} + +ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; + stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; +} + +u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); +} + +void FileEnvironment::Deserialize(std::ifstream& file) { + u64 code_size{}; + u64 num_texture_types{}; + u64 num_cbuf_values{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u32 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } +} + +u64 FileEnvironment::ReadInstruction(u32 address) { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; +} + +u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { + const auto it{texture_types.find(handle)}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +u32 FileEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 FileEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +u32 FileEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +std::array FileEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename) try { + std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + if (file.tellp() == 0) { + // Write header + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); + } + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics) try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return; + } + while (file.tellg() != end) { + if (stop_loading.stop_requested()) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { + env.Deserialize(file); + } + if (envs.front().ShaderStage() == Shader::Stage::Compute) { + load_compute(file, std::move(envs.front())); + } else { + load_graphics(file, std::move(envs)); + } + } + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..37d712045 --- /dev/null +++ b/src/video_core/shader_environment.h @@ -0,0 +1,198 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/unique_function.h" +#include "shader_recompiler/environment.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +class Memorymanager; +} + +namespace VideoCommon { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +class GenericEnvironment : public Shader::Environment { +public: + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~GenericEnvironment() override; + + [[nodiscard]] u32 TextureBoundBuffer() const final; + + [[nodiscard]] u32 LocalMemorySize() const final; + + [[nodiscard]] u32 SharedMemorySize() const final; + + [[nodiscard]] std::array WorkgroupSize() const final; + + [[nodiscard]] u64 ReadInstruction(u32 address) final; + + [[nodiscard]] std::optional Analyze(); + + void SetCachedSize(size_t size_bytes); + + [[nodiscard]] size_t CachedSize() const noexcept; + + [[nodiscard]] size_t ReadSize() const noexcept; + + [[nodiscard]] bool CanBeSerialized() const noexcept; + + [[nodiscard]] u64 CalculateHash() const; + + void Serialize(std::ofstream& file) const; + +protected: + std::optional TryFindSize(); + + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + u32 raw); + + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + + u32 read_lowest = std::numeric_limits::max(); + u32 read_highest = 0; + + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; + + bool has_unbound_instructions = false; +}; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, + GPUVAddr program_base_, u32 start_address_); + + ~GraphicsEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~ComputeEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; + +class FileEnvironment final : public Shader::Environment { +public: + FileEnvironment() = default; + ~FileEnvironment() override = default; + + FileEnvironment& operator=(FileEnvironment&&) noexcept = default; + FileEnvironment(FileEnvironment&&) noexcept = default; + + FileEnvironment& operator=(const FileEnvironment&) = delete; + FileEnvironment(const FileEnvironment&) = delete; + + void Deserialize(std::ifstream& file); + + [[nodiscard]] u64 ReadInstruction(u32 address) override; + + [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + + [[nodiscard]] u32 LocalMemorySize() const override; + + [[nodiscard]] u32 SharedMemorySize() const override; + + [[nodiscard]] u32 TextureBoundBuffer() const override; + + [[nodiscard]] std::array WorkgroupSize() const override; + +private: + std::unique_ptr code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename); + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { + static_assert(std::is_trivially_copyable_v); + static_assert(std::has_unique_object_representations_v); + SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), + std::span(envs.data(), envs.size()), filename); +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics); + +} // namespace VideoCommon From c5425b38c1a4d7eae270780d8b3ba66231015038 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 05:18:26 -0300 Subject: [PATCH 303/770] vk_compute_pass: Fix -Wshadow warning --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7e5ba283b..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -238,10 +238,10 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, - StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, + DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, From 53acdda772a8b7650c46ba9d998119b8c8e30844 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 22:11:31 -0300 Subject: [PATCH 304/770] vk_scheduler: Allow command submission on worker thread This changes how Scheduler::Flush works. It queues the current command buffer to be sent to the GPU but does not do it immediately. The Vulkan worker thread takes care of that. Users will have to use Scheduler::Flush + Scheduler::WaitWorker to get the previous behavior. Scheduler::Finish is unchanged. To avoid waiting on work never queued, Scheduler::Wait sends the current command buffer if that's what the caller wants to wait. --- .../renderer_vulkan/renderer_vulkan.cpp | 79 ++++++----- .../renderer_vulkan/vk_blit_screen.cpp | 90 ++++++------ .../renderer_vulkan/vk_query_cache.cpp | 9 +- .../renderer_vulkan/vk_rasterizer.cpp | 7 +- .../renderer_vulkan/vk_scheduler.cpp | 128 ++++++++++-------- src/video_core/renderer_vulkan/vk_scheduler.h | 15 ++ .../renderer_vulkan/vk_swapchain.cpp | 39 ++---- src/video_core/renderer_vulkan/vk_swapchain.h | 23 ++-- 8 files changed, 204 insertions(+), 186 deletions(-) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..7e39b65bd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -130,35 +125,47 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); - if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { - const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - const bool is_srgb = use_accelerated && screen_info.is_srgb; - if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); - } - - scheduler.WaitWorker(); - - while (!swapchain.AcquireNextImage()) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); - } - const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); - - scheduler.Flush(render_semaphore); - - if (swapchain.Present(render_semaphore)) { - blit_screen.Recreate(); - } - gpu.RendererFrameEndNotify(); - rasterizer.TickFrame(); + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { + return; } + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; - render_window.OnFrameDisplayed(); + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + bool has_been_recreated = false; + const auto recreate_swapchain = [&] { + if (!has_been_recreated) { + has_been_recreated = true; + scheduler.WaitWorker(); + } + swapchain.Create(layout.width, layout.height, is_srgb); + }; + if (swapchain.NeedsRecreate() || + swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + recreate_swapchain(); + } + bool needs_recreate; + do { + needs_recreate = false; + swapchain.AcquireNextImage(); + if (swapchain.NeedsRecreate()) { + recreate_swapchain(); + needs_recreate = true; + } + } while (needs_recreate); + if (has_been_recreated) { + blit_screen.Recreate(); + } + const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); + scheduler.Flush(render_semaphore); + scheduler.WaitWorker(); + swapchain.Present(render_semaphore); + + gpu.RendererFrameEndNotify(); + rasterizer.TickFrame(); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .depth = 1, }, }; - scheduler.Record( - [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[image_index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); - }); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + }); } - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], - descriptor_set = descriptor_sets[image_index], buffer = *buffer, - size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, + .renderPass = *renderpass, + .framebuffer = *framebuffers[image_index], .renderArea = { .offset = {0, 0}, @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .extent = size, }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[image_index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); }); @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { void VKBlitScreen::CreateSemaphores() { semaphores.resize(image_count); - std::generate(semaphores.begin(), semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { } void VKBlitScreen::ReleaseRawImages() { - for (std::size_t i = 0; i < raw_images.size(); ++i) { - scheduler.Wait(resource_ticks.at(i)); + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..1dd78328c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,10 +114,13 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.GetScheduler().CurrentTick()) { - cache.GetScheduler().Flush(); + auto& scheduler{cache.GetScheduler()}; + if (tick >= scheduler.CurrentTick()) { + scheduler.Flush(); + // This may not be necessary, but it's better to play it safe and assume drivers don't + // support wait before signal on vkGetQueryPoolResults + scheduler.WaitWorker(); } - u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa6daeb3a..0f15ad2f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -452,10 +452,11 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { - if (draw_counter > 0) { - draw_counter = 0; - scheduler.Flush(); + if (draw_counter == 0) { + return; } + draw_counter = 0; + scheduler.Flush(); } void RasterizerVulkan::TickFrame() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 25a4933e5..81cb330d9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { command->~Command(); command = next; } - + submit = false; command_offset = 0; first = nullptr; last = nullptr; @@ -42,7 +42,7 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { AcquireNewChunk(); - AllocateNewContext(); + AllocateWorkerCommandBuffer(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); } @@ -60,6 +60,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { void VKScheduler::Finish(VkSemaphore semaphore) { const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); + WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -140,67 +141,20 @@ void VKScheduler::WorkerThread() { if (quit) { continue; } - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf); - chunk_reserve.Push(std::move(extracted_chunk)); + while (!chunk_queue.Empty()) { + auto extracted_chunk = std::move(chunk_queue.Front()); + chunk_queue.Pop(); + const bool has_submit = extracted_chunk->HasSubmit(); + extracted_chunk->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); + } + chunk_reserve.Push(std::move(extracted_chunk)); + } } while (!quit); } -void VKScheduler::SubmitExecution(VkSemaphore semaphore) { - EndPendingOperations(); - InvalidateState(); - WaitWorker(); - - std::unique_lock lock{mutex}; - - current_cmdbuf.End(); - - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - const u32 num_signal_semaphores = semaphore ? 2U : 1U; - - const u64 signal_value = master_semaphore->CurrentTick(); - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - - master_semaphore->NextTick(); - - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; - - const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, - .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, - .commandBufferCount = 1, - .pCommandBuffers = current_cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { - case VK_SUCCESS: - break; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - vk::Check(result); - } -} - -void VKScheduler::AllocateNewContext() { - std::unique_lock lock{mutex}; - +void VKScheduler::AllocateWorkerCommandBuffer() { current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); current_cmdbuf.Begin({ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, @@ -208,7 +162,61 @@ void VKScheduler::AllocateNewContext() { .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, .pInheritanceInfo = nullptr, }); +} +void VKScheduler::SubmitExecution(VkSemaphore semaphore) { + EndPendingOperations(); + InvalidateState(); + + const u64 signal_value = master_semaphore->CurrentTick(); + master_semaphore->NextTick(); + + Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + cmdbuf.End(); + + const u32 num_signal_semaphores = semaphore ? 2U : 1U; + + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; + + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + } + }); + chunk->MarkSubmit(); + DispatchWork(); +} + +void VKScheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index a40bb8bcd..40215c4c5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -86,6 +86,10 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } master_semaphore->Wait(tick); } @@ -155,15 +159,24 @@ private: return true; } + void MarkSubmit() { + submit = true; + } + bool Empty() const { return command_offset == 0; } + bool HasSubmit() const { + return submit; + } + private: Command* first = nullptr; Command* last = nullptr; size_t command_offset = 0; + bool submit = false; alignas(std::max_align_t) std::array data{}; }; @@ -176,6 +189,8 @@ private: void WorkerThread(); + void AllocateWorkerCommandBuffer(); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..a71b0b01e 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,6 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { + needs_recreate = false; + const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { @@ -82,21 +84,20 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { resource_ticks.resize(image_count); } -bool VKSwapchain::AcquireNextImage() { +void VKSwapchain::AcquireNextImage() { const VkResult result = device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], {}, &image_index); + needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; scheduler.Wait(resource_ticks[image_index]); - return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; + resource_ticks[image_index] = scheduler.CurrentTick(); } -bool VKSwapchain::Present(VkSemaphore render_semaphore) { +void VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; - bool recreated = false; - const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, @@ -107,7 +108,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; - switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: break; @@ -115,24 +115,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - if (current_width > 0 && current_height > 0) { - Create(current_width, current_height, current_srgb); - recreated = true; - } + needs_recreate = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); break; } - - resource_ticks[image_index] = scheduler.CurrentTick(); - frame_index = (frame_index + 1) % static_cast(image_count); - return recreated; -} - -bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { - // TODO(Rodrigo): Handle framebuffer pixel format changes - return framebuffer.width != current_width || framebuffer.height != current_height; + ++frame_index; + if (frame_index >= image_count) { + frame_index = 0; + } } void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, @@ -148,7 +140,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, @@ -169,7 +160,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, .clipped = VK_FALSE, .oldSwapchain = nullptr, }; - const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array queue_indices{graphics_family, present_family}; @@ -178,7 +168,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } - // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -186,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; - current_width = extent.width; - current_height = extent.height; current_srgb = srgb; images = swapchain.GetImages(); @@ -197,8 +184,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, void VKSwapchain::CreateSemaphores() { present_semaphores.resize(image_count); - std::generate(present_semaphores.begin(), present_semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(present_semaphores, + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..b38fd9dc2 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -28,14 +28,20 @@ public: void Create(u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - bool AcquireNextImage(); + void AcquireNextImage(); - /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be - /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore); + /// Presents the rendered image to the swapchain. + void Present(VkSemaphore render_semaphore); /// Returns true when the framebuffer layout has changed. - bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; + bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { + return extent.width != width || extent.height != height || current_srgb != is_srgb; + } + + /// Returns true when the image has to be recreated. + bool NeedsRecreate() const { + return needs_recreate; + } VkExtent2D GetSize() const { return extent; @@ -61,10 +67,6 @@ public: return image_format; } - bool GetSrgbState() const { - return current_srgb; - } - private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); @@ -92,9 +94,8 @@ private: VkFormat image_format{}; VkExtent2D extent{}; - u32 current_width{}; - u32 current_height{}; bool current_srgb{}; + bool needs_recreate{}; }; } // namespace Vulkan From fb14820c86f082f970183c2722c5c38bcbb5a2ab Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 27 Apr 2021 21:05:41 -0400 Subject: [PATCH 305/770] shader: Fix IADD3.CC --- .../translate/impl/integer_add_three_input.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 15da90365..259a6e6ac 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -73,21 +73,13 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { op_c = v.ir.INeg(op_c); } - IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; - IR::U1 of_1; - if (iadd3.cc != 0) { - of_1 = v.ir.GetOverflowFromOp(lhs); - } + IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; - lhs = v.ir.IAdd(lhs, carry); + lhs_1 = v.ir.IAdd(lhs_1, carry); } - if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { - const IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; - of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); - } - lhs = IntegerShift(v.ir, lhs, iadd3.shift); - const IR::U32 result{v.ir.IAdd(lhs, op_c)}; + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, iadd3.shift)}; + const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; v.X(iadd3.dest_reg, result); if (iadd3.cc != 0) { @@ -98,6 +90,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); v.SetCFlag(v.ir.GetCarryFromOp(result)); + const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); } } From f4b82b8dd70a57b5a828bcdbecf9aefd1bd240b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 1 May 2021 00:29:31 -0300 Subject: [PATCH 306/770] vk_graphics_pipeline: Fix texture buffer descriptors --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 76080bde1..9f5d30fe8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -253,6 +253,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; + size_t sampler_index{}; size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -312,11 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index] = handle.image; + image_view_indices[image_index++] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers[image_index] = sampler->Handle(); - ++image_index; + samplers[sampler_index++] = sampler->Handle(); } } if constexpr (Spec::has_images) { @@ -360,10 +360,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++texture_buffer_index; } }}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + const Shader::Info& info{stage_infos[stage]}; - if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { - buffer_cache.UnbindGraphicsTextureBuffers(stage); - } if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { add_buffer(desc); @@ -443,7 +442,9 @@ void GraphicsPipeline::ConfigureDraw() { const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (bind_pipeline) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + } if (!descriptor_set_layout) { return; } From da936d6ad8cef5418b7644754ee4bcbf7f6125f8 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 19:10:55 +0200 Subject: [PATCH 307/770] shader: Implement delegation of Exit to dispatcher on CFG --- .../frontend/maxwell/control_flow.cpp | 41 ++++++++++++++++++- .../frontend/maxwell/control_flow.h | 9 +++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 9811183f1..298faa03e 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -185,8 +185,20 @@ Function::Function(ObjectPool& block_pool, Location start_address) label.block->branch_false = nullptr; } -CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_}, program_start{start_address} { +CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address, + bool exits_to_dispatcher_) + : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ + exits_to_dispatcher_} { + if (exits_to_dispatcher) { + dispatch_block = block_pool.Create(Block{}); + dispatch_block->begin = {}; + dispatch_block->end = {}; + dispatch_block->end_class = EndClass::Exit; + dispatch_block->cond = IR::Condition(true); + dispatch_block->stack = {}; + dispatch_block->branch_true = nullptr; + dispatch_block->branch_false = nullptr; + } functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -196,6 +208,12 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre AnalyzeLabel(function_id, label); } } + if (exits_to_dispatcher) { + const auto it = functions[0].blocks.rbegin(); + dispatch_block->begin = it->end + 1; + dispatch_block->end = it->end + 1; + functions[0].blocks.insert(*dispatch_block); + } } void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { @@ -462,11 +480,22 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati // EXIT will never be taken return AnalysisState::Continue; } + if (exits_to_dispatcher && function_id != 0) { + throw NotImplementedException("Dispatch EXIT on external function."); + } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; + if (exits_to_dispatcher) { + block->end = pc; + block->branch_true = dispatch_block; + block->end_class = EndClass::Branch; + block->cond = cond; + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); + return AnalysisState::Branch; + } AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } @@ -477,6 +506,14 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } + if (exits_to_dispatcher) { + block->cond = IR::Condition{true}; + block->end = pc; + block->end_class = EndClass::Branch; + block->branch_true = dispatch_block; + block->branch_false = nullptr; + return AnalysisState::Branch; + } block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 89966b16a..0e515c3b6 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -111,7 +111,8 @@ class CFG { }; public: - explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address); + explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address, + bool exits_to_dispatcher = false); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -128,6 +129,10 @@ public: return std::span(functions.data(), functions.size()); } + [[nodiscard]] bool ExitsToDispatcher() const { + return exits_to_dispatcher; + } + private: void AnalyzeLabel(FunctionId function_id, Label& label); @@ -158,6 +163,8 @@ private: boost::container::small_vector functions; FunctionId current_function_id{0}; Location program_start; + bool exits_to_dispatcher{}; + Block* dispatch_block{}; }; } // namespace Shader::Maxwell::Flow From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: [PATCH 308/770] shader: Implement VertexA stage --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/program.cpp | 28 +++++++ .../frontend/maxwell/program.h | 2 + .../ir_opt/dual_vertex_pass.cpp | 74 +++++++++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 26 +++++++ src/shader_recompiler/ir_opt/passes.h | 7 ++ src/shader_recompiler/ir_opt/texture_pass.cpp | 21 ++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 17 ++++- 12 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/dual_vertex_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 051e5d05a..151733090 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -162,6 +162,7 @@ add_library(shader_recompiler STATIC ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp + ir_opt/dual_vertex_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/lower_fp16_to_fp32.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4562db45b..c352bbd84 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -25,6 +25,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 335603f88..d3a1db340 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + void EmitUnreachable(EmitContext& ctx) { ctx.OpUnreachable(); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5c1b02d53..dba902186 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Join: case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8f32c9e74..b14719c51 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Join, Void, ) OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aee96eae3..59897cb3e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg); +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..f35c6478a --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/bit_cast.h" +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void VertexATransformPass(IR::Program& program) { + bool replaced_join{}; + bool eliminated_epilogue{}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::Return: + inst.ReplaceOpcode(IR::Opcode::Join); + replaced_join = true; + break; + case IR::Opcode::Epilogue: + inst.Invalidate(); + eliminated_epilogue = true; + break; + default: + break; + } + if (replaced_join && eliminated_epilogue) { + return; + } + } + } +} + +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } +} + +void DualVertexJoinPass(IR::Program& program) { + const auto& blocks = program.blocks; + s64 s = static_cast(blocks.size()) - 1; + if (s < 1) { + throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + } + for (s64 index = 0; index < s; index++) { + IR::Block* const current_block = blocks[index]; + IR::Block* const next_block = blocks[index + 1]; + for (IR::Inst& inst : current_block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Join) { + IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; + ir.Branch(next_block); + inst.Invalidate(); + // only 1 join should exist + return; + } + } + } + throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 87eca2a0d..1d11a00d8 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { } } +template +static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast(descriptors.size()) - 1; +} + +void JoinStorageInfo(Info& base, Info& source) { + auto& descriptors = base.storage_buffers_descriptors; + for (auto& desc : source.storage_buffers_descriptors) { + auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; + })}; + if (it != descriptors.end()) { + it->is_written |= desc.is_written; + continue; + } + descriptors.push_back(desc); + } +} + } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 186104713..e9cb8546a 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); +// Dual Vertex +void VertexATransformPass(IR::Program& program); +void VertexBTransformPass(IR::Program& program); +void DualVertexJoinPass(IR::Program& program); +void JoinTextureInfo(Info& base, Info& source); +void JoinStorageInfo(Info& base, Info& source); + } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index cfa6b34b9..2b38bcf42 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) { } } +void JoinTextureInfo(Info& base, Info& source) { + Descriptors descriptors{ + base.texture_buffer_descriptors, + base.image_buffer_descriptors, + base.texture_descriptors, + base.image_descriptors, + }; + for (auto& desc : source.texture_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.texture_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_descriptors) { + descriptors.Add(desc); + } +} + } // namespace Shader::Optimization diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0822862fe..638475251 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -287,22 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; + bool uses_vertex_a{}; + std::size_t start_value_processing{}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } + uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + continue; + } + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); + start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: [PATCH 309/770] shader: Address feedback --- .../frontend/maxwell/control_flow.cpp | 10 ++--- .../frontend/maxwell/program.cpp | 37 +++++++++---------- .../frontend/maxwell/program.h | 1 + .../ir_opt/dual_vertex_pass.cpp | 20 +++++----- .../renderer_vulkan/vk_pipeline_cache.cpp | 18 ++++----- 5 files changed, 42 insertions(+), 44 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 298faa03e..e7abea82f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -209,9 +209,9 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre } } if (exits_to_dispatcher) { - const auto it = functions[0].blocks.rbegin(); - dispatch_block->begin = it->end + 1; - dispatch_block->end = it->end + 1; + const auto last_block{functions[0].blocks.rbegin()}; + dispatch_block->begin = last_block->end + 1; + dispatch_block->end = last_block->end + 1; functions[0].blocks.insert(*dispatch_block); } } @@ -481,7 +481,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } if (exits_to_dispatcher && function_id != 0) { - throw NotImplementedException("Dispatch EXIT on external function."); + throw NotImplementedException("Dispatch EXIT on external function"); } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { @@ -490,9 +490,9 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; if (exits_to_dispatcher) { block->end = pc; - block->branch_true = dispatch_block; block->end_class = EndClass::Branch; block->cond = cond; + block->branch_true = dispatch_block; block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); return AnalysisState::Branch; } diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 59897cb3e..a4fa4319d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -151,31 +151,30 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool #include -#include -#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -40,7 +38,7 @@ void VertexATransformPass(IR::Program& program) { } void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Prologue) { return inst.Invalidate(); @@ -51,24 +49,24 @@ void VertexBTransformPass(IR::Program& program) { void DualVertexJoinPass(IR::Program& program) { const auto& blocks = program.blocks; - s64 s = static_cast(blocks.size()) - 1; - if (s < 1) { - throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + const s64 sub_size = static_cast(blocks.size()) - 1; + if (sub_size < 1) { + throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); } - for (s64 index = 0; index < s; index++) { - IR::Block* const current_block = blocks[index]; - IR::Block* const next_block = blocks[index + 1]; + for (s64 index = 0; index < sub_size; ++index) { + IR::Block* const current_block{blocks[index]}; + IR::Block* const next_block{blocks[index + 1]}; for (IR::Inst& inst : current_block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Join) { IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; ir.Branch(next_block); inst.Invalidate(); - // only 1 join should exist + // Only 1 join should exist return; } } } - throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); + throw LogicError("Dual Vertex Join pass failed, no join present"); } } // namespace Shader::Optimization diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 638475251..634bbb450 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -288,32 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; - bool uses_vertex_a{}; - std::size_t start_value_processing{}; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } - uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { + // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - continue; + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; - programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); - start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } From 028f0033bd8d1e2eece7eb6e9b5bf716c739b450 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:43:17 -0300 Subject: [PATCH 310/770] shader: Remove shader util --- src/shader_recompiler/CMakeLists.txt | 6 -- src/shader_recompiler/file_environment.cpp | 50 ------------ src/shader_recompiler/file_environment.h | 25 ------ src/shader_recompiler/main.cpp | 95 ---------------------- 4 files changed, 176 deletions(-) delete mode 100644 src/shader_recompiler/file_environment.cpp delete mode 100644 src/shader_recompiler/file_environment.h delete mode 100644 src/shader_recompiler/main.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 151733090..f20031d98 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -22,8 +22,6 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_warp.cpp environment.h exception.h - file_environment.cpp - file_environment.h frontend/ir/attribute.cpp frontend/ir/attribute.h frontend/ir/basic_block.cpp @@ -178,9 +176,6 @@ add_library(shader_recompiler STATIC target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) -add_executable(shader_util main.cpp) -target_link_libraries(shader_util PRIVATE shader_recompiler) - if (MSVC) target_compile_options(shader_recompiler PRIVATE /W4 @@ -213,4 +208,3 @@ else() endif() create_target_directory_groups(shader_recompiler) -create_target_directory_groups(shader_util) diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp deleted file mode 100644 index f2104f444..000000000 --- a/src/shader_recompiler/file_environment.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include - -#include "exception.h" -#include "file_environment.h" - -namespace Shader { - -FileEnvironment::FileEnvironment(const char* path) { - std::FILE* const file{std::fopen(path, "rb")}; - if (!file) { - throw RuntimeError("Failed to open file='{}'", path); - } - std::fseek(file, 0, SEEK_END); - const long size{std::ftell(file)}; - std::rewind(file); - if (size % 8 != 0) { - std::fclose(file); - throw RuntimeError("File size={} is not aligned to 8", size); - } - // TODO: Use a unique_ptr to avoid zero-initializing this - const size_t num_inst{static_cast(size) / 8}; - data.resize(num_inst); - if (std::fread(data.data(), 8, num_inst, file) != num_inst) { - std::fclose(file); - throw RuntimeError("Failed to read instructions={} from file='{}'", num_inst, path); - } - std::fclose(file); -} - -FileEnvironment::~FileEnvironment() = default; - -u64 FileEnvironment::ReadInstruction(u32 offset) { - if (offset % 8 != 0) { - throw InvalidArgument("offset={} is not aligned to 8", offset); - } - if (offset / 8 >= static_cast(data.size())) { - throw InvalidArgument("offset={} is out of bounds", offset); - } - return data[offset / 8]; -} - -u32 FileEnvironment::TextureBoundBuffer() const { - throw NotImplementedException("Texture bound buffer serialization"); -} - -std::array FileEnvironment::WorkgroupSize() const { - return {1, 1, 1}; -} - -} // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h deleted file mode 100644 index 71601f8fd..000000000 --- a/src/shader_recompiler/file_environment.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include - -#include "common/common_types.h" -#include "shader_recompiler/environment.h" - -namespace Shader { - -class FileEnvironment : public Environment { -public: - explicit FileEnvironment(const char* path); - ~FileEnvironment() override; - - u64 ReadInstruction(u32 offset) override; - - u32 TextureBoundBuffer() const override; - - std::array WorkgroupSize() const override; - -private: - std::vector data; -}; - -} // namespace Shader diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp deleted file mode 100644 index 72565f477..000000000 --- a/src/shader_recompiler/main.cpp +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/file_environment.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/decode.h" -#include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/translate/translate.h" - -using namespace Shader; -using namespace Shader::Maxwell; - -template -static void ForEachFile(const std::filesystem::path& path, Func&& func) { - std::filesystem::directory_iterator end; - for (std::filesystem::directory_iterator it{path}; it != end; ++it) { - if (std::filesystem::is_directory(*it)) { - ForEachFile(*it, func); - } else { - func(*it); - } - } -} - -void RunDatabase() { - std::vector> map; - ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { - map.emplace_back(std::make_unique(path.string().c_str())); - }); - ObjectPool block_pool; - using namespace std::chrono; - auto t0 = high_resolution_clock::now(); - int N = 1; - int n = 0; - for (int i = 0; i < N; ++i) { - for (auto& env : map) { - ++n; - // fmt::print(stdout, "Decoding {}\n", path.string()); - - const Location start_address{0}; - block_pool.ReleaseContents(); - Flow::CFG cfg{*env, block_pool, start_address}; - // fmt::print(stdout, "{}\n", cfg->Dot()); - // IR::Program program{env, cfg}; - // Optimize(program); - // const std::string code{EmitGLASM(program)}; - } - } - auto t = high_resolution_clock::now(); - fmt::print(stdout, "{} ms", duration_cast(t - t0).count() / double(N)); -} - -static constexpr Profile PROFILE{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = true, - .support_separate_rounding_mode = true, - .support_fp16_denorm_preserve = true, - .support_fp32_denorm_preserve = true, - .support_fp16_denorm_flush = true, - .support_fp32_denorm_flush = true, -}; - -int main() { - // RunDatabase(); - - ObjectPool flow_block_pool; - ObjectPool inst_pool; - ObjectPool block_pool; - - // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - FileEnvironment env{"D:\\Shaders\\shader.bin"}; - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - flow_block_pool.ReleaseContents(); - Flow::CFG cfg{env, flow_block_pool, 0}; - fmt::print(stdout, "{}\n", cfg.Dot()); - IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; - fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - const std::vector spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; - std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; - std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); - std::fclose(file); - std::system("spirv-dis D:\\shader.spv"); -} From a46d91b1efa7136b5a4304ae1f13183b8a579b49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:47:24 -0300 Subject: [PATCH 311/770] shader: Add OpenGL shader profile options --- src/shader_recompiler/profile.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 08242184f..12699511a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -61,6 +61,9 @@ struct Profile { u32 supported_spirv{0x00010000}; bool unified_descriptor_binding{}; + bool support_descriptor_aliasing{}; + bool support_int8{}; + bool support_int16{}; bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; @@ -77,11 +80,19 @@ struct Profile { bool support_viewport_index_layer_non_geometry{}; bool support_viewport_mask{}; bool support_typeless_image_loads{}; + bool support_demote_to_helper_invocation{}; bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; + bool lower_left_origin_mode{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + // Offset image operands with an unsigned type do not work + bool has_broken_unsigned_image_offsets{}; + // Signed instructions with unsigned data types are misinterpreted + bool has_broken_signed_operations{}; + // Ignores SPIR-V ordered vs unordered using GLSL semantics + bool ignore_nan_fp_comparisons{}; std::array generic_input_types{}; bool convert_depth_mode{}; From d5d6778ba53b218a7f19c0ab0873404a24ac015b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:52:54 -0300 Subject: [PATCH 312/770] spirv: Desambiguate descriptor names Worksaround a bug on Nvidia's OpenGL SPIR-V compiler where names are used for name matching. --- .../backend/spirv/emit_context.cpp | 46 +++++++++++++++---- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 8e625f8fb..0459c3925 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -222,13 +222,39 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } +std::string_view StageName(Stage stage) { + switch (stage) { + case Stage::VertexA: + return "vs_a"; + case Stage::VertexB: + return "vs"; + case Stage::TessellationControl: + return "tcs"; + case Stage::TessellationEval: + return "tes"; + case Stage::Geometry: + return "gs"; + case Stage::Fragment: + return "fs"; + case Stage::Compute: + return "cs"; + } + throw InvalidArgument("Invalid stage {}", stage); +} + +template +void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { + ctx.Name(object, + fmt::format(format_str, StageName(ctx.stage), std::forward(args)...).c_str()); +} + void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size) { const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{ctx.TypeStruct(array_type)}; - ctx.Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); + Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT); ctx.Decorate(struct_type, spv::Decoration::Block); ctx.MemberName(struct_type, 0, "data"); ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); @@ -382,11 +408,13 @@ Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_p } template -std::string NameOf(const Desc& desc, std::string_view prefix) { +std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) { if (desc.count > 1) { - return fmt::format("{}{}_{:02x}x{}", prefix, desc.cbuf_index, desc.cbuf_offset, desc.count); + return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index, + desc.cbuf_offset, desc.count); } else { - return fmt::format("{}{}_{:02x}", prefix, desc.cbuf_index, desc.cbuf_offset); + return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index, + desc.cbuf_offset); } } @@ -989,7 +1017,7 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, NameOf(desc, "texbuf")); + Name(id, NameOf(stage, desc, "texbuf")); texture_buffers.push_back({ .id = id, .count = desc.count, @@ -1008,12 +1036,12 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { throw NotImplementedException("Array of image buffers"); } const spv::ImageFormat format{GetImageFormat(desc.format)}; - const Id image_type{TypeImage(U32[4], spv::Dim::Buffer, false, false, false, 2, format)}; + const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, NameOf(desc, "imgbuf")); + Name(id, NameOf(stage, desc, "imgbuf")); image_buffers.push_back({ .id = id, .image_type = image_type, @@ -1036,7 +1064,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, NameOf(desc, "tex")); + Name(id, NameOf(stage, desc, "tex")); textures.push_back({ .id = id, .sampled_type = sampled_type, @@ -1062,7 +1090,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, NameOf(desc, "img")); + Name(id, NameOf(stage, desc, "img")); images.push_back({ .id = id, .image_type = image_type, From 48a17298d76cd8ed3bf2b53aca1e1ac097693976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:58:11 -0300 Subject: [PATCH 313/770] spirv: Support OpenGL uniform buffers and change bindings --- .../backend/spirv/emit_context.cpp | 102 +++++++++++------- .../backend/spirv/emit_context.h | 15 ++- .../backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/emit_spirv.h | 2 +- .../spirv/emit_spirv_context_get_set.cpp | 98 ++++++++++++++--- .../renderer_vulkan/vk_pipeline_cache.cpp | 7 +- 6 files changed, 168 insertions(+), 58 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0459c3925..0eb400223 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -441,8 +441,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& binding) : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { + const bool is_unified{profile.unified_descriptor_binding}; + u32& uniform_binding{is_unified ? binding.unified : binding.uniform_buffer}; + u32& storage_binding{is_unified ? binding.unified : binding.storage_buffer}; + u32& texture_binding{is_unified ? binding.unified : binding.texture}; + u32& image_binding{is_unified ? binding.unified : binding.image}; AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); @@ -450,12 +455,12 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineLocalMemory(program); DefineSharedMemory(program); DefineSharedMemoryFunctions(program); - DefineConstantBuffers(program.info, binding); - DefineStorageBuffers(program.info, binding); - DefineTextureBuffers(program.info, binding); - DefineImageBuffers(program.info, binding); - DefineTextures(program.info, binding); - DefineImages(program.info, binding); + DefineConstantBuffers(program.info, uniform_binding); + DefineStorageBuffers(program.info, storage_binding); + DefineTextureBuffers(program.info, texture_binding); + DefineImageBuffers(program.info, image_binding); + DefineTextures(program.info, texture_binding); + DefineImages(program.info, image_binding); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); DefineLabels(program); @@ -489,6 +494,20 @@ Id EmitContext::Def(const IR::Value& value) { } } +Id EmitContext::BitOffset8(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const((offset.U32() % 4) * 8); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u)); +} + +Id EmitContext::BitOffset16(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const(((offset.U32() / 2) % 2) * 16); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); +} + void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -496,6 +515,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + S32.Define(*this, TypeInt(32, true), "s32"); private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); @@ -889,28 +909,36 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } - if (True(info.used_constant_buffer_types & IR::Type::U8)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); - DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); - } - if (True(info.used_constant_buffer_types & IR::Type::U16)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); - DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); - } - if (True(info.used_constant_buffer_types & IR::Type::U32)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', - sizeof(u32)); - } - if (True(info.used_constant_buffer_types & IR::Type::F32)) { - DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', - sizeof(f32)); - } - if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', - sizeof(u32[2])); - } - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - binding += desc.count; + if (profile.support_descriptor_aliasing) { + if (True(info.used_constant_buffer_types & IR::Type::U8)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } + if (True(info.used_constant_buffer_types & IR::Type::U16)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', + sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', + sizeof(s16)); + } + if (True(info.used_constant_buffer_types & IR::Type::U32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); + } + if (True(info.used_constant_buffer_types & IR::Type::F32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); + } + if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); + } + binding += static_cast(info.constant_buffer_descriptors.size()); + } else { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', + sizeof(u32[4])); + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + binding += desc.count; + } } } @@ -920,35 +948,37 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } AddExtension("SPV_KHR_storage_buffer_storage_class"); - if (True(info.used_storage_buffer_types & IR::Type::U8)) { + const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types + : IR::Type::U32}; + if (True(used_types & IR::Type::U8)) { DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, sizeof(u8)); DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, sizeof(u8)); } - if (True(info.used_storage_buffer_types & IR::Type::U16)) { + if (True(used_types & IR::Type::U16)) { DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, sizeof(u16)); DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, sizeof(u16)); } - if (True(info.used_storage_buffer_types & IR::Type::U32)) { + if (True(used_types & IR::Type::U32)) { DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], sizeof(u32)); } - if (True(info.used_storage_buffer_types & IR::Type::F32)) { + if (True(used_types & IR::Type::F32)) { DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], sizeof(f32)); } - if (True(info.used_storage_buffer_types & IR::Type::U64)) { + if (True(used_types & IR::Type::U64)) { DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, sizeof(u64)); } - if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { + if (True(used_types & IR::Type::U32x2)) { DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], sizeof(u32[2])); } - if (True(info.used_storage_buffer_types & IR::Type::U32x4)) { + if (True(used_types & IR::Type::U32x4)) { DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], sizeof(u32[4])); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c52544fb7..baf12c217 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -17,6 +17,14 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); @@ -62,6 +70,7 @@ struct UniformDefinitions { Id U32{}; Id F32{}; Id U32x2{}; + Id U32x4{}; }; struct StorageTypeDefinition { @@ -101,11 +110,14 @@ struct GenericElementInfo { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); + explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); + [[nodiscard]] Id BitOffset8(const IR::Value& offset); + [[nodiscard]] Id BitOffset16(const IR::Value& offset); + Id Const(u32 value) { return Constant(U32[1], value); } @@ -139,6 +151,7 @@ public: Id U64{}; VectorTypes F32; VectorTypes U32; + VectorTypes S32; VectorTypes F16; VectorTypes F64; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 815b3cd95..0cb075670 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -368,7 +368,7 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding) { EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index c352bbd84..8f6482b7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -14,7 +14,7 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, - u32& binding); + Bindings& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 29da2ef70..ef32184ea 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -121,7 +121,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, - const IR::Value& binding, const IR::Value& offset) { + const IR::Value& binding, const IR::Value& offset, bool check_alignment = true) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -137,13 +137,31 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; return ctx.OpLoad(result_type, access_chain); } - if (offset.U32() % element_size != 0) { + if (check_alignment && offset.U32() % element_size != 0) { throw NotImplementedException("Unaligned immediate constant buffer load"); } const Id imm_offset{ctx.Const(offset.U32() / element_size)}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; return ctx.OpLoad(result_type, access_chain); } + +Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset, + false); +} + +Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { + if (offset.IsImmediate()) { + const u32 element{(offset.U32() / 4) % 4 + index_offset}; + return ctx.OpCompositeExtract(ctx.U32[1], vector, element); + } + const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; + Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; + if (index_offset > 0) { + element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); + } + return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element); +} } // Anonymous namespace void EmitGetRegister(EmitContext&) { @@ -179,40 +197,86 @@ void EmitGetIndirectBranchVariable(EmitContext&) { } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; - return ctx.OpUConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); + } } Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; - return ctx.OpSConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); + } } Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; - return ctx.OpUConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{ + GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); + } } Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; - return ctx.OpSConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{ + GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); + } } Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return GetCbufElement(ctx, vector, offset, 0u); + } } Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); + } } Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset); + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, + offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), + GetCbufElement(ctx, vector, offset, 1u)); + } } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Const(element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; @@ -221,7 +285,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.Const(0.0f); } const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; + const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } @@ -232,8 +296,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad( - ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id())); + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, + ctx.Const(element))); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 634bbb450..1334882b5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -127,6 +127,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw base_profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, + .support_descriptor_aliasing = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == @@ -149,9 +150,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device.IsExtShaderViewportIndexLayerSupported(), .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), + .support_demote_to_helper_invocation = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .has_broken_unsigned_image_offsets = false, .generic_input_types{}, .fixed_state_point_size{}, .alpha_test_func{}, @@ -312,7 +315,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -398,7 +401,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; const std::vector code{EmitSPIRV(base_profile, program, binding)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; From 7b03b9711815d0c4c39bb26f83ada9f6957bb269 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:08:58 -0300 Subject: [PATCH 314/770] spirv: Implement int8 and int16 conversion fallbacks --- .../backend/spirv/emit_spirv_convert.cpp | 99 +++++++++++++++---- 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 757165626..acb8957fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -5,17 +5,62 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +Id ExtractU16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U16, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractS16(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.S16, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); + } +} + +Id ExtractU8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U8, value); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} + +Id ExtractS8(EmitContext& ctx, Id value) { + if (ctx.profile.support_int8) { + return ctx.OpSConvert(ctx.S8, value); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); + } +} +} // Anonymous namespace Id EmitConvertS16F16(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } } Id EmitConvertS16F32(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } } Id EmitConvertS16F64(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + } else { + return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); + } } Id EmitConvertS32F16(EmitContext& ctx, Id value) { @@ -23,7 +68,11 @@ Id EmitConvertS32F16(EmitContext& ctx, Id value) { } Id EmitConvertS32F32(EmitContext& ctx, Id value) { - return ctx.OpConvertFToS(ctx.U32[1], value); + if (ctx.profile.has_broken_signed_operations) { + return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); + } else { + return ctx.OpConvertFToS(ctx.U32[1], value); + } } Id EmitConvertS32F64(EmitContext& ctx, Id value) { @@ -43,15 +92,27 @@ Id EmitConvertS64F64(EmitContext& ctx, Id value) { } Id EmitConvertU16F16(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } } Id EmitConvertU16F32(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } } Id EmitConvertU16F64(EmitContext& ctx, Id value) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + if (ctx.profile.support_int16) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); + } else { + return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); + } } Id EmitConvertU32F16(EmitContext& ctx, Id value) { @@ -103,11 +164,11 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { } Id EmitConvertF16S8(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F16[1], value); + return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); } Id EmitConvertF16S16(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F16[1], value); + return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); } Id EmitConvertF16S32(EmitContext& ctx, Id value) { @@ -119,11 +180,11 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) { } Id EmitConvertF16U8(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F16[1], value); + return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); } Id EmitConvertF16U16(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F16[1], value); + return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); } Id EmitConvertF16U32(EmitContext& ctx, Id value) { @@ -135,11 +196,11 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) { } Id EmitConvertF32S8(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); } Id EmitConvertF32S16(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); } Id EmitConvertF32S32(EmitContext& ctx, Id value) { @@ -151,11 +212,11 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) { } Id EmitConvertF32U8(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); } Id EmitConvertF32U16(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); } Id EmitConvertF32U32(EmitContext& ctx, Id value) { @@ -167,11 +228,11 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) { } Id EmitConvertF64S8(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); } Id EmitConvertF64S16(EmitContext& ctx, Id value) { - return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); } Id EmitConvertF64S32(EmitContext& ctx, Id value) { @@ -183,11 +244,11 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) { } Id EmitConvertF64U8(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); + return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); } Id EmitConvertF64U16(EmitContext& ctx, Id value) { - return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); + return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); } Id EmitConvertF64U32(EmitContext& ctx, Id value) { From 33bebc34127305e107d54e0bdd0a6806676abf2f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:12:08 -0300 Subject: [PATCH 315/770] spirv: Add integer clamping workarounds Workaround more bugs on Nvidia's OpenGL SPIR-V compiler. --- .../backend/spirv/emit_spirv_integer.cpp | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index cd5b1f42c..86e6a4f3b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -171,7 +171,13 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) { } Id EmitSMin32(EmitContext& ctx, Id a, Id b) { - return ctx.OpSMin(ctx.U32[1], a, b); + const bool is_broken{ctx.profile.has_broken_signed_operations}; + if (is_broken) { + a = ctx.OpBitcast(ctx.S32[1], a); + b = ctx.OpBitcast(ctx.S32[1], b); + } + const Id result{ctx.OpSMin(ctx.U32[1], a, b)}; + return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; } Id EmitUMin32(EmitContext& ctx, Id a, Id b) { @@ -179,7 +185,13 @@ Id EmitUMin32(EmitContext& ctx, Id a, Id b) { } Id EmitSMax32(EmitContext& ctx, Id a, Id b) { - return ctx.OpSMax(ctx.U32[1], a, b); + const bool is_broken{ctx.profile.has_broken_signed_operations}; + if (is_broken) { + a = ctx.OpBitcast(ctx.S32[1], a); + b = ctx.OpBitcast(ctx.S32[1], b); + } + const Id result{ctx.OpSMax(ctx.U32[1], a, b)}; + return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; } Id EmitUMax32(EmitContext& ctx, Id a, Id b) { @@ -187,14 +199,32 @@ Id EmitUMax32(EmitContext& ctx, Id a, Id b) { } Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { - const Id result{ctx.OpSClamp(ctx.U32[1], value, min, max)}; + Id result{}; + if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) { + value = ctx.OpBitcast(ctx.S32[1], value); + min = ctx.OpBitcast(ctx.S32[1], min); + max = ctx.OpBitcast(ctx.S32[1], max); + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min); + } else { + result = ctx.OpSClamp(ctx.S32[1], value, min, max); + } + result = ctx.OpBitcast(ctx.U32[1], result); + } else { + result = ctx.OpSClamp(ctx.U32[1], value, min, max); + } SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); return result; } Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { - const Id result{ctx.OpUClamp(ctx.U32[1], value, min, max)}; + Id result{}; + if (ctx.profile.has_broken_spirv_clamp) { + result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min); + } else { + result = ctx.OpUClamp(ctx.U32[1], value, min, max); + } SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); return result; From 4ead714910136dded0f404e184603f17b6810291 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:13:34 -0300 Subject: [PATCH 316/770] spirv: Add int8 and int16 capabilities only when supported --- src/shader_recompiler/backend/spirv/emit_context.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0eb400223..e9ffe4955 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -526,12 +526,12 @@ void EmitContext::DefineCommonTypes(const Info& info) { output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); - if (info.uses_int8) { + if (info.uses_int8 && profile.support_int8) { AddCapability(spv::Capability::Int8); U8 = Name(TypeInt(8, false), "u8"); S8 = Name(TypeInt(8, true), "s8"); } - if (info.uses_int16) { + if (info.uses_int16 && profile.support_int16) { AddCapability(spv::Capability::Int16); U16 = Name(TypeInt(16, false), "u16"); S16 = Name(TypeInt(16, true), "s16"); From d2e811db2edd3829b344e96ad56ae979bccd28d2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:14:35 -0300 Subject: [PATCH 317/770] spirv: Workaround image unsigned offset bug Workaround bug on Nvidia's OpenGL SPIR-V compiler when using unsigned texture offsets. --- .../backend/spirv/emit_context.h | 25 ++++++++++++++++--- .../backend/spirv/emit_spirv_image.cpp | 10 ++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index baf12c217..823ed8525 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -122,10 +122,6 @@ public: return Constant(U32[1], value); } - Id Const(f32 value) { - return Constant(F32[1], value); - } - Id Const(u32 element_1, u32 element_2) { return ConstantComposite(U32[2], Const(element_1), Const(element_2)); } @@ -139,6 +135,27 @@ public: Const(element_4)); } + Id SConst(s32 value) { + return Constant(S32[1], value); + } + + Id SConst(s32 element_1, s32 element_2) { + return ConstantComposite(S32[2], SConst(element_1), SConst(element_2)); + } + + Id SConst(s32 element_1, s32 element_2, s32 element_3) { + return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3)); + } + + Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) { + return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3), + SConst(element_4)); + } + + Id Const(f32 value) { + return Constant(F32[1], value); + } + const Profile& profile; Stage stage{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6008980af..a6cb67b97 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -104,7 +104,7 @@ private: return; } if (offset.IsImmediate()) { - Add(spv::ImageOperandsMask::ConstOffset, ctx.Const(offset.U32())); + Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(offset.U32())); return; } IR::Inst* const inst{offset.InstRecursive()}; @@ -112,16 +112,16 @@ private: switch (inst->GetOpcode()) { case IR::Opcode::CompositeConstructU32x2: Add(spv::ImageOperandsMask::ConstOffset, - ctx.Const(inst->Arg(0).U32(), inst->Arg(1).U32())); + ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32())); return; case IR::Opcode::CompositeConstructU32x3: Add(spv::ImageOperandsMask::ConstOffset, - ctx.Const(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32())); + ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32())); return; case IR::Opcode::CompositeConstructU32x4: Add(spv::ImageOperandsMask::ConstOffset, - ctx.Const(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32(), - inst->Arg(3).U32())); + ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32(), + inst->Arg(3).U32())); return; default: break; From bafe9e35a96407b0cddcc8b66316063f0e7f9c76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:15:36 -0300 Subject: [PATCH 318/770] spirv: Only add image operands mask when needed --- .../backend/spirv/emit_spirv_image.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index a6cb67b97..6680cf1b3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -94,6 +94,10 @@ public: return std::span{operands.data(), operands.size()}; } + std::optional MaskOptional() const noexcept { + return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt; + } + spv::ImageOperandsMask Mask() const noexcept { return mask; } @@ -318,7 +322,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& bias_lc, offset); return Emit(&EmitContext::OpImageSparseSampleImplicitLod, &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } else { // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as // if the lod was explicitly zero. This may change on Turing with implicit compute @@ -347,7 +351,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va offset); return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -365,7 +369,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), - operands.Mask(), operands.Span()); + operands.MaskOptional(), operands.Span()); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -373,7 +377,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, - ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.Mask(), + ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); } @@ -385,7 +389,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, info, index), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { From cfd873275d705f124efff6ceae33efc8994e64fa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:18:22 -0300 Subject: [PATCH 319/770] spirv: Use OriginLowerLeft when requested --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0cb075670..10de612cd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -212,7 +212,11 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; - ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + if (ctx.profile.lower_left_origin_mode) { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); + } else { + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + } if (program.info.stores_frag_depth) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } From 2b434b74af7be8c6ed2e96970e4e3965e351edbd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:18:55 -0300 Subject: [PATCH 320/770] spirv: Enable DemoteToHelperInvocationEXT only when supported --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 10de612cd..eb192e3c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -296,7 +296,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_sparse_residency) { ctx.AddCapability(spv::Capability::SparseResidency); } - if (info.uses_demote_to_helper_invocation) { + if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) { ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } From d2a0f9d7ad89184294e4d3f05ae0843e4ff4e6be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:19:27 -0300 Subject: [PATCH 321/770] spirv: Do not enable ShaderLayer This is enabled by an extension instead of the capability. --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index eb192e3c9..745a834e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -300,9 +300,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_layer) { - ctx.AddCapability(spv::Capability::ShaderLayer); - } if (info.stores_viewport_index) { ctx.AddCapability(spv::Capability::MultiViewport); } From fd913bceafe8a702baf9b91ce8e618c17c965a64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:20:37 -0300 Subject: [PATCH 322/770] spirv: Add OpKill fallback to demote --- .../backend/spirv/emit_spirv_control_flow.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index d3a1db340..b4a6fbb93 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -35,8 +35,12 @@ void EmitUnreachable(EmitContext& ctx) { } void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { - ctx.OpDemoteToHelperInvocationEXT(); - ctx.OpBranch(continue_label); + if (ctx.profile.support_demote_to_helper_invocation) { + ctx.OpDemoteToHelperInvocationEXT(); + ctx.OpBranch(continue_label); + } else { + ctx.OpKill(); + } } } // namespace Shader::Backend::SPIRV From fde47152d924c8137447bc0df38bc17bf7c15137 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:21:17 -0300 Subject: [PATCH 323/770] spirv: Add SSBO read fallbacks when no aliasing is available --- .../backend/spirv/emit_spirv_memory.cpp | 136 +++++++++++++----- 1 file changed, 99 insertions(+), 37 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 8849258e3..a6a3f3351 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -8,44 +8,62 @@ namespace Shader::Backend::SPIRV { namespace { -Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size, + u32 index_offset = 0) { if (offset.IsImmediate()) { - const u32 imm_offset{static_cast(offset.U32() / element_size)}; + const u32 imm_offset{static_cast(offset.U32() / element_size) + index_offset}; return ctx.Const(imm_offset); } const u32 shift{static_cast(std::countr_zero(element_size))}; - const Id index{ctx.Def(offset)}; - if (shift == 0) { - return index; + Id index{ctx.Def(offset)}; + if (shift != 0) { + const Id shift_id{ctx.Const(shift)}; + index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } - const Id shift_id{ctx.Const(shift)}; - return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); + if (index_offset != 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); + } + return index; } Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, const StorageTypeDefinition& type_def, size_t element_size, - Id StorageDefinitions::*member_ptr) { + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; - const Id index{StorageIndex(ctx, offset, element_size)}; + const Id index{StorageIndex(ctx, offset, element_size, index_offset)}; return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); } Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, const StorageTypeDefinition& type_def, size_t element_size, - Id StorageDefinitions::*member_ptr) { - const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + const Id pointer{ + StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; return ctx.OpLoad(result_type, pointer); } +Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 index_offset = 0) { + return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32, index_offset); +} + void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, const StorageTypeDefinition& type_def, size_t element_size, - Id StorageDefinitions::*member_ptr) { - const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { + const Id pointer{ + StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; ctx.OpStore(pointer, value); } + +void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + u32 index_offset = 0) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32, index_offset); +} } // Anonymous namespace void EmitLoadGlobalU8(EmitContext&) { @@ -105,42 +123,73 @@ void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return ctx.OpUConvert(ctx.U32[1], - LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, - sizeof(u8), &StorageDefinitions::U8)); + if (ctx.profile.support_descriptor_aliasing) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8)); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset8(offset), ctx.Const(8u)); + } } Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return ctx.OpSConvert(ctx.U32[1], - LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, - sizeof(s8), &StorageDefinitions::S8)); + if (ctx.profile.support_descriptor_aliasing) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8)); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset8(offset), ctx.Const(8u)); + } } Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return ctx.OpUConvert(ctx.U32[1], - LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, - sizeof(u16), &StorageDefinitions::U16)); + if (ctx.profile.support_descriptor_aliasing) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16)); + } else { + return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset16(offset), ctx.Const(16u)); + } } Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return ctx.OpSConvert(ctx.U32[1], - LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, - sizeof(s16), &StorageDefinitions::S16)); + if (ctx.profile.support_descriptor_aliasing) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16)); + } else { + return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), + ctx.BitOffset16(offset), ctx.Const(16u)); + } } Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), - &StorageDefinitions::U32); + return LoadStorage32(ctx, binding, offset); } Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, sizeof(u32[2]), - &StorageDefinitions::U32x2); + if (ctx.profile.support_descriptor_aliasing) { + return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, + sizeof(u32[2]), &StorageDefinitions::U32x2); + } else { + return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0), + LoadStorage32(ctx, binding, offset, 1)); + } } Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, sizeof(u32[4]), - &StorageDefinitions::U32x4); + if (ctx.profile.support_descriptor_aliasing) { + return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, + sizeof(u32[4]), &StorageDefinitions::U32x4); + } else { + return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0), + LoadStorage32(ctx, binding, offset, 1), + LoadStorage32(ctx, binding, offset, 2), + LoadStorage32(ctx, binding, offset, 3)); + } } void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -169,20 +218,33 @@ void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::V void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), - &StorageDefinitions::U32); + WriteStorage32(ctx, binding, offset, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), - &StorageDefinitions::U32x2); + if (ctx.profile.support_descriptor_aliasing) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); + } else { + for (u32 index = 0; index < 2; ++index) { + const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; + WriteStorage32(ctx, binding, offset, element, index); + } + } } void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), - &StorageDefinitions::U32x4); + if (ctx.profile.support_descriptor_aliasing) { + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); + } else { + for (u32 index = 0; index < 4; ++index) { + const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; + WriteStorage32(ctx, binding, offset, element, index); + } + } } } // namespace Shader::Backend::SPIRV From 850b08a16cecf260e7c8e07b81b5e0078622974d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:27:09 -0300 Subject: [PATCH 324/770] spirv: Be aware of NAN unaware drivers --- .../spirv/emit_spirv_floating_point.cpp | 58 +++++++++++++------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 97d11cc63..b3afbef25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -22,6 +22,28 @@ Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { return ctx.OpFClamp(type, value, zero, one); } } + +Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)}; + const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))}; + const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))}; + return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan); + } else { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + } +} + +Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)}; + const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)}; + const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan); + } else { + return (ctx.*comp_func)(ctx.U1, lhs, rhs); + } +} } // Anonymous namespace Id EmitFPAbs16(EmitContext& ctx, Id value) { @@ -227,27 +249,27 @@ Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); } Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); } Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); } Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + return FPOrdNotEqual(ctx, lhs, rhs); } Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + return FPOrdNotEqual(ctx, lhs, rhs); } Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + return FPOrdNotEqual(ctx, lhs, rhs); } Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { @@ -275,15 +297,15 @@ Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); } Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); } Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); } Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { @@ -299,15 +321,15 @@ Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); } Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); } Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); } Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { @@ -323,15 +345,15 @@ Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); } Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); } Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); } Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { @@ -347,15 +369,15 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); } Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); } Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); } Id EmitFPIsNan16(EmitContext& ctx, Id value) { From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: [PATCH 325/770] shader: Initial OpenGL implementation --- .../frontend/ir/ir_emitter.cpp | 4 + .../frontend/ir/ir_emitter.h | 1 + .../translate/impl/move_special_register.cpp | 7 + src/video_core/CMakeLists.txt | 4 + src/video_core/buffer_cache/buffer_cache.h | 53 ++-- .../renderer_opengl/gl_buffer_cache.cpp | 37 ++- .../renderer_opengl/gl_buffer_cache.h | 40 ++- .../renderer_opengl/gl_compute_program.cpp | 178 +++++++++++ .../renderer_opengl/gl_compute_program.h | 83 +++++ src/video_core/renderer_opengl/gl_device.cpp | 89 ------ src/video_core/renderer_opengl/gl_device.h | 16 - .../renderer_opengl/gl_graphics_program.cpp | 296 ++++++++++++++++++ .../renderer_opengl/gl_graphics_program.h | 105 +++++++ .../renderer_opengl/gl_rasterizer.cpp | 23 +- .../renderer_opengl/gl_shader_cache.cpp | 275 +++++++++++++++- .../renderer_opengl/gl_shader_cache.h | 98 +++--- .../renderer_opengl/gl_shader_manager.cpp | 146 --------- .../renderer_opengl/gl_shader_manager.h | 73 +---- .../renderer_opengl/gl_texture_cache.cpp | 257 +++++---------- .../renderer_opengl/gl_texture_cache.h | 29 +- .../renderer_opengl/maxwell_to_gl.h | 108 +++++++ .../renderer_opengl/renderer_opengl.cpp | 17 +- .../renderer_opengl/renderer_opengl.h | 5 +- .../renderer_opengl/util_shaders.cpp | 13 +- .../renderer_vulkan/pipeline_helper.h | 17 - .../renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +- .../renderer_vulkan/vk_rasterizer.cpp | 11 - src/video_core/shader_cache.cpp | 17 + src/video_core/shader_cache.h | 23 +- src/video_core/shader_environment.cpp | 4 +- src/video_core/shader_environment.h | 16 - src/video_core/texture_cache/formatter.cpp | 4 +- src/video_core/texture_cache/formatter.h | 3 +- src/video_core/textures/texture.h | 9 + .../vulkan_common/vulkan_device.cpp | 2 +- 38 files changed, 1427 insertions(+), 705 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a..5913fdeff 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + U32 IREmitter::LocalInvocationIdX() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d..a12919283 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); + [[nodiscard]] Value LocalInvocationId(); [[nodiscard]] U32 LocalInvocationIdX(); [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f5..b008c37c0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,10 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_program.cpp + renderer_opengl/gl_compute_program.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_program.cpp + renderer_opengl/gl_graphics_program.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..2d0ef1307 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint base_binding = graphics_base_storage_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97..4986c65fd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector views; }; class BufferCacheRuntime { @@ -75,13 +85,19 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { if (use_assembly_shaders) { const GLuint handle = fast_uniforms[stage][binding_index].handle; const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, fast_uniforms[stage][binding_index].handle, 0, @@ -103,7 +119,7 @@ public: std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast(offset), static_cast(size)); @@ -118,6 +134,19 @@ public: return has_fast_buffer_sub_data; } + void SetBaseUniformBindings(const std::array& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -133,6 +162,11 @@ private: u32 max_attributes = 0; + std::array graphics_base_uniform_bindings{}; + std::array graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional stream_buffer; std::array, @@ -155,8 +189,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 000000000..d5ef65439 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputeProgramKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, + program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputeProgram::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 000000000..64a75d44d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputeProgramKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputeProgramKey&) const noexcept; + + bool operator!=(const ComputeProgramKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputeProgram { +public: + explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + OGLProgram program; + Shader::Info info; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..18bbc4c1f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -22,34 +22,11 @@ namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template T GetInteger(GLenum pname) { @@ -82,15 +59,6 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - std::array BuildMaxUniformBuffers() noexcept { std::array max; std::ranges::transform(LIMIT_UBOS, max.begin(), @@ -98,62 +66,6 @@ std::array BuildMaxUniformBuffers() noexcep return max; } -std::array BuildBaseBindings() noexcept { - std::array bindings; - - static constexpr std::array stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { @@ -225,7 +137,6 @@ Device::Device() { } max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..152a3acd3 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -12,13 +12,6 @@ namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); explicit Device(std::nullptr_t); @@ -28,14 +21,6 @@ public: return max_uniform_buffers[static_cast(shader_type)]; } - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast(shader_type)); - } - size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -134,7 +119,6 @@ private: std::string vendor_name; std::array max_uniform_buffers{}; - std::array base_bindings{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 000000000..fd0958719 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -0,0 +1,296 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +size_t GraphicsProgramKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + const std::array& infos) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + const auto& info{stage_infos[stage]}; + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + for (const auto& desc : info.constant_buffer_descriptors) { + base_uniform_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.storage_buffers_descriptors) { + base_storage_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers[stage] += desc.count; + num_textures += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers[stage] += desc.count; + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsProgram::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + texture_cache.UpdateRenderTargets(false); + + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 000000000..5adf3f41e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsProgramKey&) const noexcept; + + bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { +public: + explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, const std::array& infos); + + void Configure(bool is_indexed); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863..e527b76ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + if (!program) { + return; + } + program->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40..c9ca1f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,11 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" @@ -25,17 +30,281 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { +namespace { +// FIXME: Move this somewhere else +const Shader::Profile profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = true, + .support_viewport_mask = true, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .warp_size_potentially_larger_than_guest = true, + .support_int64_atomics = false, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + + .generic_input_types = {}, + .convert_depth_mode = false, + .force_early_z = false, + + .tess_primitive = {}, + .tess_spacing = {}, + .tess_clockwise = false, + + .input_topology = Shader::InputTopology::Triangles, + + .fixed_state_point_size = std::nullopt, + + .alpha_test_func = Shader::CompareFunction::Always, + .alpha_test_reference = 0.0f, + + .y_negate = false, + + .xfb_varyings = {}, +}; + +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::GraphicsEnvironment; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +void AddShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); + + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (!Settings::values.renderer_debug) { + return; + } + GLint shader_status{}; + glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} +} // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ + state_tracker_} {} ShaderCache::~ShaderCache() = default; +GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { + if (!RefreshStages(graphics_key.unique_hashes)) { + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& program{pair->second}; + if (is_new) { + program = CreateGraphicsProgram(); + } + return program.get(); +} + +ComputeProgram* ShaderCache::CurrentComputeProgram() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute.launch_description}; + const ComputeProgramKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputeProgram(key, shader); + return pipeline.get(); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + + Shader::Backend::SPIRV::Bindings binding; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const std::vector code{EmitSPIRV(profile, program, binding)}; + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + AddShader(Stage(stage_index), gl_program.handle, code); + } + LinkProgram(gl_program.handle); + + return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, + program_manager, state_tracker, std::move(gl_program), + infos); +} + +std::unique_ptr ShaderCache::CreateComputeProgram( + const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + return CreateComputeProgram(main_pools, key, env, true); +} + +std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + Shader::Backend::SPIRV::Bindings binding; + const std::vector code{EmitSPIRV(profile, program, binding)}; + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); + LinkProgram(gl_program.handle); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, + program_manager, std::move(gl_program), program.info); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17c..b479d073a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,20 +5,18 @@ #pragma once #include -#include -#include -#include -#include -#include #include -#include -#include #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -32,64 +30,62 @@ class EmuWindow; namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - - std::array unique_hashes; - std::array color_formats; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, u32> tessellation_primitive; - BitField<8, 2, u32> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - u32 padding; - TransformFeedbackState xfb_state; - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { -public: -private: + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; }; class ShaderCache : public VideoCommon::ShaderCache { public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + + [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + private: + std::unique_ptr CreateGraphicsProgram(); + + std::unique_ptr CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, + std::span envs, bool build_in_parallel); + + std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, + const VideoCommon::ShaderInfo* shader); + + std::unique_ptr CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel); + Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + + GraphicsProgramKey graphics_key{}; + + ShaderPools main_pools; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..70781d6f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,24 @@ #pragma once -#include - #include -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); + void BindProgram(GLuint program) { + if (bound_program == program) { + return; + } + bound_program = program; + glUseProgram(program); + } - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); - - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); - - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); + void RestoreGuestCompute() {} private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; - }; - - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; - - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; + GLuint bound_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf84218..7053be161 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arrayflags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array new_handles; glGenTextures(static_cast(new_handles.size()), new_handles.data()); @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } null_image_views[static_cast(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } VideoCommon::SubresourceRange flatten_range = info.range; std::array handles; @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI [[fallthrough]]; case ImageViewType::e1D: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); break; case ImageViewType::e2DArray: flatten_range.extent.layers = 1; @@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .extent = {.levels = 1, .layers = 1}, }; glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + } else { + glGenTextures(2, handles.data()); + SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, + info.range); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); break; case ImageViewType::CubeArray: flatten_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} @@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); + const std::string name = VideoCommon::Name(*this); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } stored_views.emplace_back().handle = handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e650..2e3e02b79 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array null_image_views; + std::array null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast(query_type)]; + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -198,15 +197,25 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range); - std::array views{}; + std::array views{}; std::vector stored_views; - GLuint default_handle = 0; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..4e77ef808 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, - program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { OGLShader fragment_shader; fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); // Generate presentation sampler present_sampler.Create(); @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindProgram(present_program.handle); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..b3ee55665 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_program; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..51e72b705 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,7 +16,6 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5..78bf90c48 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { return MakeShaderInfo(env, *cpu_shader_addr); } +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + ShaderInfo* ShaderCache::TryGet(VAddr addr) const { std::scoped_lock lock{lookup_mutex}; diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc84..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,14 +4,18 @@ #pragma once +#include +#include #include #include +#include #include #include #include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" namespace Tegra { class MemoryManager; @@ -30,6 +34,8 @@ class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; @@ -58,6 +64,15 @@ public: void SyncGuestHost(); protected: + struct GraphicsEnvironments { + std::array envs; + std::array env_ptrs; + + std::span Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); + } + }; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_); @@ -65,17 +80,21 @@ protected: /// @brief Update the hashes and information of shader stages /// @param unique_hashes Shader hashes to store into when a stage is enabled /// @return True no success, false on error - bool RefreshStages(std::array& unique_hashes); + bool RefreshStages(std::array& unique_hashes); /// @brief Returns information about the current compute shader /// @return Pointer to a valid shader, nullptr on error const ShaderInfo* ComputeShader(); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes); + Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; - std::array shader_infos{}; + std::array shader_infos{}; bool last_shaders_valid = false; private: diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc0097..c93174519 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -187,8 +187,8 @@ std::optional GenericEnvironment::TryFindSize() { Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d712045..d26dbfaab 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -29,22 +29,6 @@ class Memorymanager; namespace VideoCommon { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - if (via_header_index) { - image = data; - sampler = data; - } else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda..e27a2b51e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, From 7ecc6de56ae01602b25408db8b6658d7a41a419a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 23 Apr 2021 17:47:54 -0400 Subject: [PATCH 326/770] shader: Implement Int32 SUATOM/SURED --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 3 + .../backend/spirv/emit_context.h | 1 + .../backend/spirv/emit_spirv.cpp | 3 + .../backend/spirv/emit_spirv.h | 44 ++++ .../backend/spirv/emit_spirv_image_atomic.cpp | 182 ++++++++++++++++ .../frontend/ir/ir_emitter.cpp | 89 ++++++++ .../frontend/ir/ir_emitter.h | 26 +++ .../frontend/ir/microinstruction.cpp | 33 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 38 ++++ .../frontend/maxwell/maxwell.inc | 3 +- .../frontend/maxwell/translate/impl/impl.h | 1 + .../translate/impl/not_implemented.cpp | 4 - .../impl/surface_atomic_operations.cpp | 204 ++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 36 ++++ src/shader_recompiler/ir_opt/texture_pass.cpp | 68 +++++- src/shader_recompiler/shader_info.h | 2 + 17 files changed, 733 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f20031d98..0bcd714d6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -12,6 +12,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_image.cpp + backend/spirv/emit_spirv_image_atomic.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp @@ -138,6 +139,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/surface_atomic_operations.cpp frontend/maxwell/translate/impl/surface_load_store.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index e9ffe4955..549df0d4b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1107,6 +1107,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } ++binding; } + if (info.uses_atomic_image_u32) { + image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + } } void EmitContext::DefineImages(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 823ed8525..30b08104d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -198,6 +198,7 @@ public: Id image_buffer_type{}; Id sampled_texture_buffer_type{}; + Id image_u32{}; std::array cbufs{}; std::array ssbos{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 745a834e3..3f9adc902 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -335,6 +335,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_writes) { ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } + if (info.uses_image_buffers) { + ctx.AddCapability(spv::Capability::ImageBuffer); + } if (info.uses_sample_id) { ctx.AddCapability(spv::Capability::SampleRateShading); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8f6482b7b..47d62b190 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -509,6 +509,50 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id derivates, Id offset, Id lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); Id EmitLaneId(EmitContext& ctx); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp new file mode 100644 index 000000000..05bed22b9 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; + return def.id; + } else { + const ImageDefinition def{ctx.images.at(index.U32())}; + return def.id; + } +} + +std::pair AtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Const(static_cast(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto info{inst->Flags()}; + const Id image{Image(ctx, index, info)}; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} +} // Anonymous namespace + +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd); +} + +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin); +} + +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin); +} + +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax); +} + +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax); +} + +Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { + // TODO: This is not yet implemented + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd); +} + +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr); +} + +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor); +} + +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value) { + return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange); +} + +Id EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBindlessImageAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitBoundImageAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5913fdeff..354d72c9b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1869,6 +1869,95 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value Inst(op, Flags{info}, handle, coords, color); } +Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32 + : Opcode::BindlessImageAtomicIAdd32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32 + : Opcode::BindlessImageAtomicSMin32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32 + : Opcode::BindlessImageAtomicUMin32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value, + bool is_signed, TextureInstInfo info) { + return is_signed ? ImageAtomicSMin(handle, coords, value, info) + : ImageAtomicUMin(handle, coords, value, info); +} + +Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32 + : Opcode::BindlessImageAtomicSMax32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32 + : Opcode::BindlessImageAtomicUMax32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, + bool is_signed, TextureInstInfo info) { + return is_signed ? ImageAtomicSMax(handle, coords, value, info) + : ImageAtomicUMax(handle, coords, value, info); +} + +Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32 + : Opcode::BindlessImageAtomicInc32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32 + : Opcode::BindlessImageAtomicDec32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32 + : Opcode::BindlessImageAtomicAnd32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32 + : Opcode::BindlessImageAtomicOr32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32 + : Opcode::BindlessImageAtomicXor32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32 + : Opcode::BindlessImageAtomicExchange32}; + return Inst(op, Flags{info}, handle, coords, value); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index a12919283..4e614d424 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -334,6 +334,32 @@ public: [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords, + const Value& value, bool is_signed, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, + const Value& value, bool is_signed, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index dba902186..616ef17d4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -166,6 +166,39 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BindlessImageWrite: case Opcode::BoundImageWrite: case Opcode::ImageWrite: + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b14719c51..9165421f8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -496,6 +496,44 @@ OPCODE(ImageGradient, F32x4, Opaq OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +// Atomic Image operations + +OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, ) + +OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, ) + +OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) + // Warp operations OPCODE(LaneId, U32, ) OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c759bd4d4..2fee591bb 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -244,7 +244,8 @@ INST(STG, "STG", "1110 1110 1101 1---") INST(STL, "STL", "1110 1111 0101 0---") INST(STP, "STP", "1110 1110 1010 0---") INST(STS, "STS", "1110 1111 0101 1---") -INST(SUATOM_cas, "SUATOM", "1110 1010 ---- ----") +INST(SUATOM, "SUATOM", "1110 1010 0--- ----") +INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bf7d1bae8..335e4f24f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -303,6 +303,7 @@ public: void STL(u64 insn); void STP(u64 insn); void STS(u64 insn); + void SUATOM(u64 insn); void SUATOM_cas(u64 insn); void SULD(u64 insn); void SURED(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4f99bbbe..7e26ab359 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -249,10 +249,6 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SURED(u64) { - ThrowNotImplemented(Opcode::SURED); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..994bdc3eb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -0,0 +1,204 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +enum class Size : u64 { + U32, + S32, + U64, + S64, + F32FTZRN, + F16x2FTZRN, + SD32, + SD64, +}; + +enum class AtomicOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + default: + break; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, + const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, + bool is_signed) { + switch (op) { + case AtomicOp::ADD: + return ir.ImageAtomicIAdd(handle, coords, op_b, info); + case AtomicOp::MIN: + return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); + case AtomicOp::MAX: + return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); + case AtomicOp::INC: + return ir.ImageAtomicInc(handle, coords, op_b, info); + case AtomicOp::DEC: + return ir.ImageAtomicDec(handle, coords, op_b, info); + case AtomicOp::AND: + return ir.ImageAtomicAnd(handle, coords, op_b, info); + case AtomicOp::OR: + return ir.ImageAtomicOr(handle, coords, op_b, info); + case AtomicOp::XOR: + return ir.ImageAtomicXor(handle, coords, op_b, info); + case AtomicOp::EXCH: + return ir.ImageAtomicExchange(handle, coords, op_b, info); + default: + throw NotImplementedException("Atomic Operation {}", op); + } +} + +ImageFormat Format(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return ImageFormat::R32_UINT; + default: + break; + } + throw NotImplementedException("Invalid size {}", size); +} + +bool IsSizeInt32(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return true; + default: + return false; + } +} + +void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, + IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, + u64 bound_offset, bool is_bindless, bool write_result) { + if (clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", clamp); + } + if (!IsSizeInt32(size)) { + throw NotImplementedException("Size {}", size); + } + const bool is_signed{size == Size::S32}; + const ImageFormat format{Format(size)}; + const TextureType tex_type{GetType(type)}; + const IR::Value coords{MakeCoords(v, coord_reg, type)}; + + const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) + : v.ir.Imm32(static_cast(bound_offset * 4))}; + IR::TextureInstInfo info{}; + info.type.Assign(tex_type); + info.image_format.Assign(format); + + // TODO: float/64-bit operand + const IR::Value op_b{v.X(operand_reg)}; + const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; + + if (write_result) { + v.X(dest_reg, IR::U32{color}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::SUATOM(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> is_bindless; + BitField<29, 4, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<51, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<36, 13, u64> bound_offset; // !is_bindless + BitField<39, 8, IR::Reg> bindless_reg; // is_bindless + } const suatom{insn}; + + ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, + suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, + suatom.is_bindless != 0, true); +} + +void TranslatorVisitor::SURED(u64 insn) { + // TODO: confirm offsets + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<21, 3, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<20, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> operand_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sured{insn}; + ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, + sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, + sured.is_bound == 0, false); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index bb4aeb57c..7d8794a7e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -565,6 +565,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageWrite: { const auto flags{inst.Flags()}; info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; + info.uses_image_buffers |= flags.type == TextureType::Buffer; break; } case IR::Opcode::SubgroupEqMask: @@ -696,6 +697,41 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.used_storage_buffer_types |= IR::Type::U64; info.uses_int64_bit_atomics = true; break; + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: + info.uses_atomic_image_u32 = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 2b38bcf42..9e0a2fb09 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -76,6 +76,39 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageWrite: case IR::Opcode::BindlessImageWrite: return IR::Opcode::ImageWrite; + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicIAdd32: + return IR::Opcode::ImageAtomicIAdd32; + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicSMin32: + return IR::Opcode::ImageAtomicSMin32; + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + return IR::Opcode::ImageAtomicUMin32; + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicSMax32: + return IR::Opcode::ImageAtomicSMax32; + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + return IR::Opcode::ImageAtomicUMax32; + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicInc32: + return IR::Opcode::ImageAtomicInc32; + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicDec32: + return IR::Opcode::ImageAtomicDec32; + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicAnd32: + return IR::Opcode::ImageAtomicAnd32; + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicOr32: + return IR::Opcode::ImageAtomicOr32; + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicXor32: + return IR::Opcode::ImageAtomicXor32; + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::BindlessImageAtomicExchange32: + return IR::Opcode::ImageAtomicExchange32; default: return IR::Opcode::Void; } @@ -95,6 +128,17 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGradient: case IR::Opcode::BindlessImageRead: case IR::Opcode::BindlessImageWrite: + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -108,6 +152,17 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: case IR::Opcode::BoundImageRead: case IR::Opcode::BoundImageWrite: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: return false; default: throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); @@ -359,11 +414,22 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: case IR::Opcode::ImageWrite: { if (cbuf.has_secondary) { throw NotImplementedException("Unexpected separate sampler"); } - const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; + const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index cb1969b3a..2f6adf714 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -160,6 +160,7 @@ struct Info { bool uses_derivatives{}; bool uses_typeless_image_reads{}; bool uses_typeless_image_writes{}; + bool uses_image_buffers{}; bool uses_shared_increment{}; bool uses_shared_decrement{}; bool uses_global_increment{}; @@ -173,6 +174,7 @@ struct Info { bool uses_atomic_f32x2_max{}; bool uses_int64_bit_atomics{}; bool uses_global_memory{}; + bool uses_atomic_image_u32{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; From 153a77efee629ccdc342e3f3f2fd49488e884233 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 17:16:54 +0200 Subject: [PATCH 327/770] shader: Stub SR_AFFINITY --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 01fb6f5e5..fe3cdfa96 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -159,6 +159,9 @@ enum class SpecialRegister : u64 { return ir.SubgroupGeMask(); case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); + case SpecialRegister::SR_AFFINITY: + // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); } From ee61ec2c39e6db53c56e7ac761a2223d99f06908 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 2 May 2021 01:50:27 +0200 Subject: [PATCH 328/770] shader: Optimize NVN Fallthrough --- .../backend/spirv/emit_context.cpp | 3 + .../frontend/maxwell/program.cpp | 7 +- .../ir_opt/collect_shader_info_pass.cpp | 78 +++++++++++++++++-- src/shader_recompiler/shader_info.h | 4 + 4 files changed, 83 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 549df0d4b..be88b76f7 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { AddLabel(); const size_t num_buffers{info.storage_buffers_descriptors.size()}; for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } const auto& ssbo{info.storage_buffers_descriptors[index]}; const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a4fa4319d..0d3f00699 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) { }()}; auto& descs{program.info.storage_buffers_descriptors}; for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } const u32 offset{base + index * descriptor_size}; const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; continue; } - // Assume these are written for now descs.push_back({ .cbuf_index = driver_cbuf, .cbuf_offset = offset, .count = 1, - .is_written = true, + .is_written = program.info.stores_global_memory, }); } } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 7d8794a7e..13b793d57 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) { } } +void CheckCBufNVN(Info& info, IR::Inst& inst) { + const IR::Value cbuf_index{inst.Arg(0)}; + if (!cbuf_index.IsImmediate()) { + info.nvn_buffer_used.set(); + return; + } + const u32 index{cbuf_index.U32()}; + if (index != 0) { + return; + } + const IR::Value cbuf_offset{inst.Arg(1)}; + if (!cbuf_offset.IsImmediate()) { + info.nvn_buffer_used.set(); + return; + } + const u32 offset{cbuf_offset.U32()}; + const u32 descriptor_size{0x10}; + const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16}; + if (offset >= info.nvn_buffer_base && offset < upper_limit) { + const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size}; + info.nvn_buffer_used.set(nvn_index, true); + } +} + void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: @@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.GetOpcode()) { - case IR::Opcode::LoadGlobalU8: - case IR::Opcode::LoadGlobalS8: - case IR::Opcode::LoadGlobalU16: - case IR::Opcode::LoadGlobalS16: - case IR::Opcode::LoadGlobal32: - case IR::Opcode::LoadGlobal64: - case IR::Opcode::LoadGlobal128: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU16: @@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::GlobalAtomicMaxF32x2: + info.stores_global_memory = true; + [[fallthrough]]; + case IR::Opcode::LoadGlobalU8: + case IR::Opcode::LoadGlobalS8: + case IR::Opcode::LoadGlobalU16: + case IR::Opcode::LoadGlobalS16: + case IR::Opcode::LoadGlobal32: + case IR::Opcode::LoadGlobal64: + case IR::Opcode::LoadGlobal128: info.uses_int64 = true; info.uses_global_memory = true; info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; @@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { } } +void VisitCbufs(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetCbufU8: + case IR::Opcode::GetCbufS8: + case IR::Opcode::GetCbufU16: + case IR::Opcode::GetCbufS16: + case IR::Opcode::GetCbufU32: + case IR::Opcode::GetCbufF32: + case IR::Opcode::GetCbufU32x2: { + CheckCBufNVN(info, inst); + break; + } + default: + break; + } +} + void Visit(Info& info, IR::Inst& inst) { VisitUsages(info, inst); VisitFpModifiers(info, inst); + VisitCbufs(info, inst); } void GatherInfoFromHeader(Environment& env, Info& info) { @@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) { void CollectShaderInfoPass(Environment& env, IR::Program& program) { Info& info{program.info}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + info.nvn_buffer_base = base; + for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { Visit(info, inst); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 2f6adf714..a50a9a18c 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" @@ -140,6 +141,7 @@ struct Info { bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; + bool stores_global_memory{}; bool uses_fp16{}; bool uses_fp64{}; @@ -180,6 +182,8 @@ struct Info { IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; + u32 nvn_buffer_base{}; + std::bitset<16> nvn_buffer_used{}; boost::container::static_vector constant_buffer_descriptors; From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: [PATCH 329/770] Move SPIR-V emission functions to their own header --- src/shader_recompiler/CMakeLists.txt | 2 + src/shader_recompiler/backend/bindings.h | 19 + .../backend/spirv/emit_context.h | 9 +- .../backend/spirv/emit_spirv.cpp | 1 + .../backend/spirv/emit_spirv.h | 571 +---------------- .../backend/spirv/emit_spirv_atomic.cpp | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 1 + .../spirv/emit_spirv_bitwise_conversion.cpp | 1 + .../backend/spirv/emit_spirv_composite.cpp | 1 + .../spirv/emit_spirv_context_get_set.cpp | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 1 + .../backend/spirv/emit_spirv_convert.cpp | 1 + .../spirv/emit_spirv_floating_point.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 1 + .../backend/spirv/emit_spirv_image_atomic.cpp | 1 + .../backend/spirv/emit_spirv_instructions.h | 583 ++++++++++++++++++ .../backend/spirv/emit_spirv_integer.cpp | 1 + .../backend/spirv/emit_spirv_logical.cpp | 1 + .../backend/spirv/emit_spirv_memory.cpp | 1 + .../backend/spirv/emit_spirv_select.cpp | 1 + .../spirv/emit_spirv_shared_memory.cpp | 1 + .../backend/spirv/emit_spirv_special.cpp | 1 + .../backend/spirv/emit_spirv_undefined.cpp | 1 + .../backend/spirv/emit_spirv_warp.cpp | 1 + .../renderer_opengl/gl_shader_cache.cpp | 5 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- 26 files changed, 637 insertions(+), 579 deletions(-) create mode 100644 src/shader_recompiler/backend/bindings.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 0bcd714d6..6523615aa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(shader_recompiler STATIC + backend/bindings.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -13,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_image_atomic.cpp + backend/spirv/emit_spirv_instructions.h backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 30b08104d..8b000f1ec 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" @@ -17,14 +18,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -struct Bindings { - u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3f9adc902..0681dfd16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -9,6 +9,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 47d62b190..d8ab2d8ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,9 +4,12 @@ #pragma once +#include + #include #include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -16,569 +19,9 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding); -// Microinstruction emitters -Id EmitPhi(EmitContext& ctx, IR::Inst* inst); -void EmitVoid(EmitContext& ctx); -Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); -void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); -void EmitBarrier(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); -void EmitPrologue(EmitContext& ctx); -void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); -void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); -void EmitGetRegister(EmitContext& ctx); -void EmitSetRegister(EmitContext& ctx); -void EmitGetPred(EmitContext& ctx); -void EmitSetPred(EmitContext& ctx); -void EmitSetGotoVariable(EmitContext& ctx); -void EmitGetGotoVariable(EmitContext& ctx); -void EmitSetIndirectBranchVariable(EmitContext& ctx); -void EmitGetIndirectBranchVariable(EmitContext& ctx); -Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -void EmitGetZFlag(EmitContext& ctx); -void EmitGetSFlag(EmitContext& ctx); -void EmitGetCFlag(EmitContext& ctx); -void EmitGetOFlag(EmitContext& ctx); -void EmitSetZFlag(EmitContext& ctx); -void EmitSetSFlag(EmitContext& ctx); -void EmitSetCFlag(EmitContext& ctx); -void EmitSetOFlag(EmitContext& ctx); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitIsHelperInvocation(EmitContext& ctx); -Id EmitYDirection(EmitContext& ctx); -Id EmitLoadLocal(EmitContext& ctx, Id word_offset); -void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); -void EmitLoadGlobalU8(EmitContext& ctx); -void EmitLoadGlobalS8(EmitContext& ctx); -void EmitLoadGlobalU16(EmitContext& ctx); -void EmitLoadGlobalS16(EmitContext& ctx); -Id EmitLoadGlobal32(EmitContext& ctx, Id address); -Id EmitLoadGlobal64(EmitContext& ctx, Id address); -Id EmitLoadGlobal128(EmitContext& ctx, Id address); -void EmitWriteGlobalU8(EmitContext& ctx); -void EmitWriteGlobalS8(EmitContext& ctx); -void EmitWriteGlobalU16(EmitContext& ctx); -void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); -Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); -Id EmitLoadSharedU32(EmitContext& ctx, Id offset); -Id EmitLoadSharedU64(EmitContext& ctx, Id offset); -Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -void EmitCompositeConstructF64x2(EmitContext& ctx); -void EmitCompositeConstructF64x3(EmitContext& ctx); -void EmitCompositeConstructF64x4(EmitContext& ctx); -void EmitCompositeExtractF64x2(EmitContext& ctx); -void EmitCompositeExtractF64x3(EmitContext& ctx); -void EmitCompositeExtractF64x4(EmitContext& ctx); -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitBitCastU16F16(EmitContext& ctx); -Id EmitBitCastU32F32(EmitContext& ctx, Id value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); -Id EmitPackUint2x32(EmitContext& ctx, Id value); -Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x16(EmitContext& ctx, Id value); -Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); -Id EmitPackHalf2x16(EmitContext& ctx, Id value); -Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); -Id EmitPackDouble2x32(EmitContext& ctx, Id value); -Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); -void EmitGetZeroFromOp(EmitContext& ctx); -void EmitGetSignFromOp(EmitContext& ctx); -void EmitGetCarryFromOp(EmitContext& ctx); -void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitGetSparseFromOp(EmitContext& ctx); -void EmitGetInBoundsFromOp(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitIAbs64(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitGlobalAtomicIAdd32(EmitContext& ctx); -Id EmitGlobalAtomicSMin32(EmitContext& ctx); -Id EmitGlobalAtomicUMin32(EmitContext& ctx); -Id EmitGlobalAtomicSMax32(EmitContext& ctx); -Id EmitGlobalAtomicUMax32(EmitContext& ctx); -Id EmitGlobalAtomicInc32(EmitContext& ctx); -Id EmitGlobalAtomicDec32(EmitContext& ctx); -Id EmitGlobalAtomicAnd32(EmitContext& ctx); -Id EmitGlobalAtomicOr32(EmitContext& ctx); -Id EmitGlobalAtomicXor32(EmitContext& ctx); -Id EmitGlobalAtomicExchange32(EmitContext& ctx); -Id EmitGlobalAtomicIAdd64(EmitContext& ctx); -Id EmitGlobalAtomicSMin64(EmitContext& ctx); -Id EmitGlobalAtomicUMin64(EmitContext& ctx); -Id EmitGlobalAtomicSMax64(EmitContext& ctx); -Id EmitGlobalAtomicUMax64(EmitContext& ctx); -Id EmitGlobalAtomicInc64(EmitContext& ctx); -Id EmitGlobalAtomicDec64(EmitContext& ctx); -Id EmitGlobalAtomicAnd64(EmitContext& ctx); -Id EmitGlobalAtomicOr64(EmitContext& ctx); -Id EmitGlobalAtomicXor64(EmitContext& ctx); -Id EmitGlobalAtomicExchange64(EmitContext& ctx); -Id EmitGlobalAtomicAddF32(EmitContext& ctx); -Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); -Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitBindlessImageSampleImplicitLod(EmitContext&); -Id EmitBindlessImageSampleExplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); -Id EmitBindlessImageGather(EmitContext&); -Id EmitBindlessImageGatherDref(EmitContext&); -Id EmitBindlessImageFetch(EmitContext&); -Id EmitBindlessImageQueryDimensions(EmitContext&); -Id EmitBindlessImageQueryLod(EmitContext&); -Id EmitBindlessImageGradient(EmitContext&); -Id EmitBindlessImageRead(EmitContext&); -Id EmitBindlessImageWrite(EmitContext&); -Id EmitBoundImageSampleImplicitLod(EmitContext&); -Id EmitBoundImageSampleExplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); -Id EmitBoundImageGather(EmitContext&); -Id EmitBoundImageGatherDref(EmitContext&); -Id EmitBoundImageFetch(EmitContext&); -Id EmitBoundImageQueryDimensions(EmitContext&); -Id EmitBoundImageQueryLod(EmitContext&); -Id EmitBoundImageGradient(EmitContext&); -Id EmitBoundImageRead(EmitContext&); -Id EmitBoundImageWrite(EmitContext&); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset); -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); -Id EmitBindlessImageAtomicIAdd32(EmitContext&); -Id EmitBindlessImageAtomicSMin32(EmitContext&); -Id EmitBindlessImageAtomicUMin32(EmitContext&); -Id EmitBindlessImageAtomicSMax32(EmitContext&); -Id EmitBindlessImageAtomicUMax32(EmitContext&); -Id EmitBindlessImageAtomicInc32(EmitContext&); -Id EmitBindlessImageAtomicDec32(EmitContext&); -Id EmitBindlessImageAtomicAnd32(EmitContext&); -Id EmitBindlessImageAtomicOr32(EmitContext&); -Id EmitBindlessImageAtomicXor32(EmitContext&); -Id EmitBindlessImageAtomicExchange32(EmitContext&); -Id EmitBoundImageAtomicIAdd32(EmitContext&); -Id EmitBoundImageAtomicSMin32(EmitContext&); -Id EmitBoundImageAtomicUMin32(EmitContext&); -Id EmitBoundImageAtomicSMax32(EmitContext&); -Id EmitBoundImageAtomicUMax32(EmitContext&); -Id EmitBoundImageAtomicInc32(EmitContext&); -Id EmitBoundImageAtomicDec32(EmitContext&); -Id EmitBoundImageAtomicAnd32(EmitContext&); -Id EmitBoundImageAtomicOr32(EmitContext&); -Id EmitBoundImageAtomicXor32(EmitContext&); -Id EmitBoundImageAtomicExchange32(EmitContext&); -Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitLaneId(EmitContext& ctx); -Id EmitVoteAll(EmitContext& ctx, Id pred); -Id EmitVoteAny(EmitContext& ctx, Id pred); -Id EmitVoteEqual(EmitContext& ctx, Id pred); -Id EmitSubgroupBallot(EmitContext& ctx, Id pred); -Id EmitSubgroupEqMask(EmitContext& ctx); -Id EmitSubgroupLtMask(EmitContext& ctx); -Id EmitSubgroupLeMask(EmitContext& ctx); -Id EmitSubgroupGtMask(EmitContext& ctx); -Id EmitSubgroupGeMask(EmitContext& ctx); -Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); - -Id EmitDPdxFine(EmitContext& ctx, Id op_a); - -Id EmitDPdyFine(EmitContext& ctx, Id op_a); - -Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); - -Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, program, binding); +} } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 6e17d1c7e..053800eb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 705aebd81..e0b52a001 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 93a45d834..bb11f4f4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 079e226de..10ff4ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ef32184ea..8e57ff070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,6 +6,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index b4a6fbb93..6154c46be 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index acb8957fe..fd74e475f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index b3afbef25..61cf25f9c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6680cf1b3..5832104df 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp index 05bed22b9..d7f1a365a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..b5eec3cd1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,583 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" + +namespace IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 86e6a4f3b..06ab23b1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index bb434def2..b9a9500fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a6a3f3351..37a66095f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0b45db45e..c5b4f4720 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 710d1cd25..9a79fc7a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d5430e905..ba948f3c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 19b06dbe4..c9f469e90 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 239e2ecab..78b1e1ba7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c9ca1f005..6585817bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,7 +254,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( OGLProgram gl_program; gl_program.handle = glCreateProgram(); - Shader::Backend::SPIRV::Bindings binding; + Shader::Backend::Bindings binding; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -297,8 +297,7 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, program)}; OGLProgram gl_program; gl_program.handle = glCreateProgram(); AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { From b725db870984e1335e080405cf4b9ed8ef936ae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 22:34:56 -0300 Subject: [PATCH 330/770] shader: Fixup SPIR-V emit header namespaces --- src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index b5eec3cd1..a1ca3f43d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -6,12 +6,12 @@ #include "common/common_types.h" -namespace IR { +namespace Shader::IR { enum class Attribute : u64; enum class Patch : u64; class Inst; class Value; -} // namespace IR +} // namespace Shader::IR namespace Shader::Backend::SPIRV { From bfa47539f6d5779a80d6fb23ae49c1d34e01ae93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 5 May 2021 01:08:16 -0300 Subject: [PATCH 331/770] gl_shader_cache: Remove code unintentionally committed --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6585817bc..9bbdfeb62 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,9 +266,6 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( infos[stage_index] = &program.info; const std::vector code{EmitSPIRV(profile, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); AddShader(Stage(stage_index), gl_program.handle, code); } LinkProgram(gl_program.handle); From 09dc23f97188a4fa5ea03556a9187bfbefef1d78 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 5 May 2021 00:37:05 -0400 Subject: [PATCH 332/770] shader: ISET.X implementation --- .../impl/integer_compare_and_set.cpp | 66 ++++++++++++++++--- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index a2cd8d7c6..34fa7345c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,7 +9,56 @@ namespace Shader::Maxwell { namespace { -void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; @@ -24,27 +73,28 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<49, 3, CompareOp> compare_op; } const iset{insn}; - if (iset.x != 0) { - throw NotImplementedException("ISET.X"); - } - - const IR::U32 src_reg{v.X(iset.src_reg)}; + const IR::U32 src_a{v.X(iset.src_reg)}; const bool is_signed{iset.is_signed != 0}; + const IR::U32 zero{v.ir.Imm32(0)}; + const bool x{iset.x != 0}; + const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; + IR::U1 pred{v.ir.GetPred(iset.pred)}; if (iset.neg_pred != 0) { pred = v.ir.LogicalNot(pred); } - const IR::U1 cmp_result{IntegerCompare(v.ir, src_reg, src_a, iset.compare_op, is_signed)}; const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; v.X(iset.dest_reg, result); if (iset.cc != 0) { + if (x) { + throw NotImplementedException("ISET.CC + X"); + } const IR::U1 is_zero{v.ir.IEqual(result, zero)}; v.SetZFlag(is_zero); if (iset.bf != 0) { From b10cf64c486d8730fcfeb53a333814915b3b5fbe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 5 May 2021 02:19:08 -0300 Subject: [PATCH 333/770] glasm: Add GLASM backend infrastructure --- src/shader_recompiler/CMakeLists.txt | 34 +- .../backend/glasm/emit_context.cpp | 7 + .../backend/glasm/emit_context.h | 21 + .../backend/glasm/emit_glasm.cpp | 95 + .../backend/glasm/emit_glasm.h | 23 + .../backend/glasm/emit_glasm_atomic.cpp | 0 .../backend/glasm/emit_glasm_barriers.cpp | 0 .../glasm/emit_glasm_bitwise_conversion.cpp | 0 .../backend/glasm/emit_glasm_composite.cpp | 0 .../glasm/emit_glasm_context_get_set.cpp | 0 .../backend/glasm/emit_glasm_control_flow.cpp | 0 .../backend/glasm/emit_glasm_convert.cpp | 0 .../glasm/emit_glasm_floating_point.cpp | 0 .../backend/glasm/emit_glasm_image.cpp | 0 .../backend/glasm/emit_glasm_image_atomic.cpp | 0 .../backend/glasm/emit_glasm_instructions.h | 650 +++++ .../backend/glasm/emit_glasm_integer.cpp | 0 .../backend/glasm/emit_glasm_logical.cpp | 0 .../backend/glasm/emit_glasm_memory.cpp | 0 .../glasm/emit_glasm_not_implemented.cpp | 2155 +++++++++++++++++ .../backend/glasm/emit_glasm_select.cpp | 0 .../glasm/emit_glasm_shared_memory.cpp | 0 .../backend/glasm/emit_glasm_special.cpp | 0 .../backend/glasm/emit_glasm_undefined.cpp | 0 .../backend/glasm/emit_glasm_warp.cpp | 0 .../backend/glasm/reg_alloc.cpp | 82 + .../backend/glasm/reg_alloc.h | 46 + src/shader_recompiler/frontend/ir/value.h | 6 + 28 files changed, 3115 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/backend/glasm/emit_context.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_context.h create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm.h create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_image.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_instructions.h create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_select.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_special.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp create mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp create mode 100644 src/shader_recompiler/backend/glasm/reg_alloc.cpp create mode 100644 src/shader_recompiler/backend/glasm/reg_alloc.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6523615aa..f829b8d32 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,5 +1,31 @@ add_library(shader_recompiler STATIC backend/bindings.h + backend/glasm/emit_context.cpp + backend/glasm/emit_context.h + backend/glasm/emit_glasm.cpp + backend/glasm/emit_glasm.h + backend/glasm/emit_glasm_atomic.cpp + backend/glasm/emit_glasm_barriers.cpp + backend/glasm/emit_glasm_bitwise_conversion.cpp + backend/glasm/emit_glasm_composite.cpp + backend/glasm/emit_glasm_context_get_set.cpp + backend/glasm/emit_glasm_control_flow.cpp + backend/glasm/emit_glasm_convert.cpp + backend/glasm/emit_glasm_floating_point.cpp + backend/glasm/emit_glasm_image.cpp + backend/glasm/emit_glasm_image_atomic.cpp + backend/glasm/emit_glasm_instructions.h + backend/glasm/emit_glasm_integer.cpp + backend/glasm/emit_glasm_logical.cpp + backend/glasm/emit_glasm_memory.cpp + backend/glasm/emit_glasm_not_implemented.cpp + backend/glasm/emit_glasm_select.cpp + backend/glasm/emit_glasm_shared_memory.cpp + backend/glasm/emit_glasm_special.cpp + backend/glasm/emit_glasm_undefined.cpp + backend/glasm/emit_glasm_warp.cpp + backend/glasm/reg_alloc.cpp + backend/glasm/reg_alloc.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -104,8 +130,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/half_floating_point_helper.cpp frontend/maxwell/translate/impl/half_floating_point_helper.h frontend/maxwell/translate/impl/half_floating_point_multiply.cpp - frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp frontend/maxwell/translate/impl/half_floating_point_set.cpp + frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp @@ -145,11 +171,11 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/surface_load_store.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp - frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gradient.cpp - frontend/maxwell/translate/impl/texture_load_swizzled.cpp frontend/maxwell/translate/impl/texture_load.cpp + frontend/maxwell/translate/impl/texture_load_swizzled.cpp frontend/maxwell/translate/impl/texture_mipmap_level.cpp frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/video_helper.cpp @@ -173,8 +199,8 @@ add_library(shader_recompiler STATIC ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h - program_header.h profile.h + program_header.h shader_info.h ) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp new file mode 100644 index 000000000..02c4d8a5d --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -0,0 +1,7 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/backend/glasm/emit_context.h" \ No newline at end of file diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h new file mode 100644 index 000000000..ae91069c8 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/backend/glasm/reg_alloc.h" + +namespace Shader::Backend::GLASM { + +class EmitContext { +public: + std::string code; + RegAlloc reg_alloc; + +private: +}; + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..59d7c0f96 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -0,0 +1,95 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLASM { +namespace { +template +struct FuncTraits {}; + +template +struct FuncTraits { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template + using ArgType = std::tuple_element_t>; +}; + +template +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.reg_alloc.Consume(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Label(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } else if constexpr (std::is_same_v) { + return arg.Reg(); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} +} // Anonymous namespace + +std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { + EmitContext ctx; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); + } + } + return ctx.code; +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h new file mode 100644 index 000000000..a0dfdd818 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLASM { + +[[nodiscard]] std::string EmitGLASM(const Profile& profile, IR::Program& program, + Bindings& binding); + +[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitGLASM(profile, program, binding); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h new file mode 100644 index 000000000..21d6af914 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -0,0 +1,650 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +class EmitContext; + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, std::string_view label); +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label); +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx); +void EmitIsHelperInvocation(EmitContext& ctx); +void EmitYDirection(EmitContext& ctx); +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx); +void EmitUndefU8(EmitContext& ctx); +void EmitUndefU16(EmitContext& ctx); +void EmitUndefU32(EmitContext& ctx); +void EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx); +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, std::string_view value); +void EmitFPSin(EmitContext& ctx, std::string_view value); +void EmitFPCos(EmitContext& ctx, std::string_view value); +void EmitFPExp2(EmitContext& ctx, std::string_view value); +void EmitFPLog2(EmitContext& ctx, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, std::string_view value); +void EmitINeg64(EmitContext& ctx, std::string_view value); +void EmitIAbs32(EmitContext& ctx, std::string_view value); +void EmitIAbs64(EmitContext& ctx, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, std::string_view value); +void EmitBitCount32(EmitContext& ctx, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, std::string_view value); +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx); +void EmitVoteAll(EmitContext& ctx, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx); +void EmitSubgroupLtMask(EmitContext& ctx); +void EmitSubgroupLeMask(EmitContext& ctx); +void EmitSubgroupGtMask(EmitContext& ctx); +void EmitSubgroupGeMask(EmitContext& ctx); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp new file mode 100644 index 000000000..e90224e15 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -0,0 +1,2155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLASM { + +static void NotImplemented() { + throw NotImplementedException("GLASM instruction"); +} + +void EmitPhi(EmitContext& ctx, IR::Inst* inst) { + NotImplemented(); +} + +void EmitVoid(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIdentity(EmitContext& ctx, const IR::Value& value) { + NotImplemented(); +} + +void EmitBranch(EmitContext& ctx, std::string_view label) { + NotImplemented(); +} + +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label) { + NotImplemented(); +} + +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, + std::string_view continue_label) { + NotImplemented(); +} + +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { + NotImplemented(); +} + +void EmitReturn(EmitContext& ctx) { + NotImplemented(); +} + +void EmitJoin(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUnreachable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { + NotImplemented(); +} + +void EmitBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPrologue(EmitContext& ctx) { + NotImplemented(); +} + +void EmitEpilogue(EmitContext& ctx) { + NotImplemented(); +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + NotImplemented(); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + NotImplemented(); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLocalInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSampleId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIsHelperInvocation(EmitContext& ctx) { + NotImplemented(); +} + +void EmitYDirection(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { + NotImplemented(); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + NotImplemented(); +} + +void EmitUndefU1(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitWriteGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitBitCastU16F16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastU64F64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF16U16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastF64U64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitFPAbs16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPNeg16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSin(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCos(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPExp2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPLog2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitINeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitINeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitReverse32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCount32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindSMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindUMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalNot(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + NotImplemented(); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + NotImplemented(); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms) { + NotImplemented(); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod) { + NotImplemented(); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp) { + NotImplemented(); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitLaneId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitVoteAll(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteAny(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupEqMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + NotImplemented(); +} + +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp new file mode 100644 index 000000000..0460a394b --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "shader_recompiler/backend/glasm/reg_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +constexpr std::string_view SWIZZLE = "xyzw"; + +std::string Representation(Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spilling"); + } + const u32 num_elements{id.num_elements_minus_one + 1}; + const u32 index{static_cast(id.index)}; + if (num_elements == 4) { + return fmt::format("R{}", index); + } else { + return fmt::format("R{}.{}", index, SWIZZLE.substr(id.base_element, num_elements)); + } +} +} // Anonymous namespace + +std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { + const Id id{Alloc(num_elements, alignment)}; + inst.SetDefinition(id); + return Representation(id); +} + +std::string RegAlloc::Consume(const IR::Value& value) { + if (!value.IsImmediate()) { + return Consume(*value.Inst()); + } + throw NotImplementedException("Immediate loading"); +} + +std::string RegAlloc::Consume(IR::Inst& inst) { + const Id id{inst.Definition()}; + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(id); + } + return Representation(inst.Definition()); +} + +Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (register_use[reg]) { + continue; + } + num_used_registers = std::max(num_used_registers, reg + 1); + register_use[reg] = true; + return Id{ + .base_element = 0, + .num_elements_minus_one = num_elements - 1, + .index = static_cast(reg), + .is_spill = 0, + .is_condition_code = 0, + }; + } + throw NotImplementedException("Register spilling"); +} + +void RegAlloc::Free(Id id) { + if (id.is_spill != 0) { + throw NotImplementedException("Free spill"); + } + register_use[id.index] = false; +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h new file mode 100644 index 000000000..46018b0c2 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLASM { + +struct Id { + u32 base_element : 2; + u32 num_elements_minus_one : 2; + u32 index : 26; + u32 is_spill : 1; + u32 is_condition_code : 1; +}; + +class RegAlloc { +public: + std::string Define(IR::Inst& inst, u32 num_elements = 1, u32 alignment = 1); + + std::string Consume(const IR::Value& value); + +private: + static constexpr size_t NUM_REGS = 4096; + static constexpr size_t NUM_ELEMENTS = 4; + + std::string Consume(IR::Inst& inst); + + Id Alloc(u32 num_elements, u32 alignment); + + void Free(Id id); + + size_t num_used_registers{}; + std::bitset register_use{}; +}; + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index bb7d19001..c73851d11 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -218,6 +218,12 @@ public: return Common::BitCast(definition); } + /// Destructively remove one reference count from the instruction + /// Useful for register allocation + void DestructiveRemoveUsage() { + --use_count; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} From a51503660435f1279ce0fa449f9cf76e74b45d74 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:08 -0300 Subject: [PATCH 334/770] vk_master_semaphore: Use fetch_add to increase master semaphore tick --- src/video_core/renderer_vulkan/vk_master_semaphore.h | 6 +++--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -39,9 +39,9 @@ public: return KnownGpuTick() >= tick; } - /// Advance to the logical tick. - void NextTick() noexcept { - ++current_tick; + /// Advance to the logical tick and return the old one + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order::relaxed); } /// Refresh the known GPU tick diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 81cb330d9..fcb6a5911 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -168,9 +168,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - const u64 signal_value = master_semaphore->CurrentTick(); - master_semaphore->NextTick(); - + const u64 signal_value = master_semaphore->NextTick(); Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); From 56c47951c5d92d5e6145060469528301c67e0754 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:37 -0300 Subject: [PATCH 335/770] vk_query_cache: Wait before reading queries --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 1dd78328c..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,17 +114,10 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - auto& scheduler{cache.GetScheduler()}; - if (tick >= scheduler.CurrentTick()) { - scheduler.Flush(); - // This may not be necessary, but it's better to play it safe and assume drivers don't - // support wait before signal on vkGetQueryPoolResults - scheduler.WaitWorker(); - } + cache.GetScheduler().Wait(tick); u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( - query.first, query.second, 1, sizeof(data), &data, sizeof(data), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); switch (query_result) { case VK_SUCCESS: From 36f158626726f940d9dba22a2b03ebbb5aa41c5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 06:26:12 -0300 Subject: [PATCH 336/770] vk_scheduler: Use locks instead of SPSC a queue This tries to fix a data race where we'd wait forever for the GPU. --- .../renderer_vulkan/vk_scheduler.cpp | 58 +++++++++++-------- src/video_core/renderer_vulkan/vk_scheduler.h | 16 ++--- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fcb6a5911..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -47,8 +47,11 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) } VKScheduler::~VKScheduler() { - quit = true; - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + quit = true; + } + work_cv.notify_all(); worker_thread.join(); } @@ -69,20 +72,19 @@ void VKScheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - bool finished = false; - do { - cv.notify_all(); - std::unique_lock lock{mutex}; - finished = chunk_queue.Empty(); - } while (!finished); + std::unique_lock lock{work_mutex}; + wait_cv.wait(lock, [this] { return work_queue.empty(); }); } void VKScheduler::DispatchWork() { if (chunk->Empty()) { return; } - chunk_queue.Push(std::move(chunk)); - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + work_queue.push(std::move(chunk)); + } + work_cv.notify_one(); AcquireNewChunk(); } @@ -135,22 +137,27 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { void VKScheduler::WorkerThread() { Common::SetCurrentThreadName("yuzu:VulkanWorker"); - std::unique_lock lock{mutex}; do { - cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); - if (quit) { - continue; + if (work_queue.empty()) { + wait_cv.notify_all(); } - while (!chunk_queue.Empty()) { - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - const bool has_submit = extracted_chunk->HasSubmit(); - extracted_chunk->ExecuteAll(current_cmdbuf); - if (has_submit) { - AllocateWorkerCommandBuffer(); + std::unique_ptr work; + { + std::unique_lock lock{work_mutex}; + work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); + if (quit) { + continue; } - chunk_reserve.Push(std::move(extracted_chunk)); + work = std::move(work_queue.front()); + work_queue.pop(); } + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); + } + std::lock_guard reserve_lock{reserve_mutex}; + chunk_reserve.push_back(std::move(work)); } while (!quit); } @@ -269,12 +276,13 @@ void VKScheduler::EndRenderPass() { } void VKScheduler::AcquireNewChunk() { - if (chunk_reserve.Empty()) { + std::lock_guard lock{reserve_mutex}; + if (chunk_reserve.empty()) { chunk = std::make_unique(); return; } - chunk = std::move(chunk_reserve.Front()); - chunk_reserve.Pop(); + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 40215c4c5..6600fb142 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,14 +6,14 @@ #include #include +#include #include #include -#include #include #include + #include "common/alignment.h" #include "common/common_types.h" -#include "common/threadsafe_queue.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -220,11 +220,13 @@ private: std::array renderpass_images{}; std::array renderpass_image_ranges{}; - Common::SPSCQueue> chunk_queue; - Common::SPSCQueue> chunk_reserve; - std::mutex mutex; - std::condition_variable cv; - bool quit = false; + std::queue> work_queue; + std::vector> chunk_reserve; + std::mutex reserve_mutex; + std::mutex work_mutex; + std::condition_variable work_cv; + std::condition_variable wait_cv; + std::atomic_bool quit{}; }; } // namespace Vulkan From c1ba685d9c9b9ca9e8c479c52097adf943e804eb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 06:31:30 -0300 Subject: [PATCH 337/770] glasm: Changes to GLASM register allocator and emit context --- .../backend/glasm/emit_context.cpp | 8 +++- .../backend/glasm/emit_context.h | 27 ++++++++++++-- .../backend/glasm/reg_alloc.cpp | 37 +++++++++++-------- .../backend/glasm/reg_alloc.h | 18 ++++++--- 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 02c4d8a5d..b4db4ff8f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -2,6 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once +#include "shader_recompiler/backend/glasm/emit_context.h" -#include "shader_recompiler/backend/glasm/emit_context.h" \ No newline at end of file +namespace Shader::Backend::GLASM { + +EmitContext::EmitContext() = default; + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index ae91069c8..cf66619de 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -5,17 +5,38 @@ #pragma once #include +#include + +#include #include "shader_recompiler/backend/glasm/reg_alloc.h" +namespace Shader::IR { +class Inst; +} + namespace Shader::Backend::GLASM { class EmitContext { public: - std::string code; - RegAlloc reg_alloc; + explicit EmitContext(); -private: + template + void Add(const char* fmt, IR::Inst& inst, Args&&... args) { + code += fmt::format(fmt, reg_alloc.Define(inst), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + template + void Add(const char* fmt, Args&&... args) { + code += fmt::format(fmt, std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string code; + RegAlloc reg_alloc{*this}; }; } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 0460a394b..55e8107e9 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -3,18 +3,16 @@ // Refer to the license.txt file included. #include -#include #include +#include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/reg_alloc.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { namespace { -constexpr std::string_view SWIZZLE = "xyzw"; - std::string Representation(Id id) { if (id.is_condition_code != 0) { throw NotImplementedException("Condition code"); @@ -22,27 +20,36 @@ std::string Representation(Id id) { if (id.is_spill != 0) { throw NotImplementedException("Spilling"); } - const u32 num_elements{id.num_elements_minus_one + 1}; const u32 index{static_cast(id.index)}; - if (num_elements == 4) { - return fmt::format("R{}", index); - } else { - return fmt::format("R{}.{}", index, SWIZZLE.substr(id.base_element, num_elements)); + return fmt::format("R{}.x", index); +} + +std::string ImmValue(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return value.U1() ? "-1" : "0"; + case IR::Type::U32: + return fmt::format("{}", value.U32()); + case IR::Type::F32: + return fmt::format("{}", value.F32()); + default: + throw NotImplementedException("Immediate type", value.Type()); } } } // Anonymous namespace -std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { - const Id id{Alloc(num_elements, alignment)}; +std::string RegAlloc::Define(IR::Inst& inst) { + const Id id{Alloc()}; inst.SetDefinition(id); return Representation(id); } std::string RegAlloc::Consume(const IR::Value& value) { - if (!value.IsImmediate()) { - return Consume(*value.Inst()); + if (value.IsImmediate()) { + return ImmValue(value); + } else { + return Consume(*value.InstRecursive()); } - throw NotImplementedException("Immediate loading"); } std::string RegAlloc::Consume(IR::Inst& inst) { @@ -54,7 +61,7 @@ std::string RegAlloc::Consume(IR::Inst& inst) { return Representation(inst.Definition()); } -Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { +Id RegAlloc::Alloc() { for (size_t reg = 0; reg < NUM_REGS; ++reg) { if (register_use[reg]) { continue; @@ -62,8 +69,6 @@ Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { num_used_registers = std::max(num_used_registers, reg + 1); register_use[reg] = true; return Id{ - .base_element = 0, - .num_elements_minus_one = num_elements - 1, .index = static_cast(reg), .is_spill = 0, .is_condition_code = 0, diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 46018b0c2..83d728d20 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -15,27 +15,35 @@ class Value; namespace Shader::Backend::GLASM { +class EmitContext; + struct Id { - u32 base_element : 2; - u32 num_elements_minus_one : 2; - u32 index : 26; + u32 index : 30; u32 is_spill : 1; u32 is_condition_code : 1; }; class RegAlloc { public: - std::string Define(IR::Inst& inst, u32 num_elements = 1, u32 alignment = 1); + RegAlloc(EmitContext& ctx_) : ctx{ctx_} {} + + std::string Define(IR::Inst& inst); std::string Consume(const IR::Value& value); + [[nodiscard]] size_t NumUsedRegisters() const noexcept { + return num_used_registers; + } + private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; + EmitContext& ctx; + std::string Consume(IR::Inst& inst); - Id Alloc(u32 num_elements, u32 alignment); + Id Alloc(); void Free(Id id); From 6fd190d1ae4275a06ed2e488401e1d63912954be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:28:52 -0300 Subject: [PATCH 338/770] glasm: Implement basic GLASM instructions --- .../backend/glasm/emit_context.cpp | 21 +- .../backend/glasm/emit_context.h | 5 +- .../backend/glasm/emit_glasm.cpp | 66 +- .../glasm/emit_glasm_context_get_set.cpp | 125 +++ .../glasm/emit_glasm_floating_point.cpp | 421 ++++++++++ .../backend/glasm/emit_glasm_instructions.h | 177 ++-- .../backend/glasm/emit_glasm_integer.cpp | 228 +++++ .../backend/glasm/emit_glasm_memory.cpp | 178 ++++ .../glasm/emit_glasm_not_implemented.cpp | 786 +----------------- src/shader_recompiler/frontend/ir/value.h | 6 + 10 files changed, 1173 insertions(+), 840 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index b4db4ff8f..9f839f3bf 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -3,9 +3,28 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::GLASM { -EmitContext::EmitContext() = default; +EmitContext::EmitContext(IR::Program& program) { + // FIXME: Temporary partial implementation + u32 cbuf_index{}; + for (const auto& desc : program.info.constant_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Constant buffer descriptor array"); + } + Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); + ++cbuf_index; + } + for (const auto& desc : program.info.storage_buffers_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Storage buffer descriptor array"); + } + } + if (const size_t num = program.info.storage_buffers_descriptors.size(); num > 0) { + Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + } +} } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index cf66619de..4f98a9816 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -13,13 +13,14 @@ namespace Shader::IR { class Inst; -} +struct Program; +} // namespace Shader::IR namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(); + explicit EmitContext(IR::Program& program); template void Add(const char* fmt, IR::Inst& inst, Args&&... args) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 59d7c0f96..65600f58c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -50,7 +50,7 @@ template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, *inst, Arg>(ctx, inst->Arg(I))...); } else { func(ctx, Arg>(ctx, inst->Arg(I))...); } @@ -64,7 +64,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { Invoke(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; - static constexpr bool is_first_arg_inst = std::is_same_v; + static constexpr bool is_first_arg_inst = std::is_same_v; using Indices = std::make_index_sequence; Invoke(ctx, inst, Indices{}); } @@ -80,16 +80,76 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { } throw LogicError("Invalid opcode {}", inst->GetOpcode()); } + +void Identity(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst* const value_inst{value.InstRecursive()}; + if (inst.GetOpcode() == IR::Opcode::Identity) { + value_inst->DestructiveAddUsage(inst.UseCount()); + value_inst->DestructiveRemoveUsage(); + } + inst.SetDefinition(value_inst->Definition()); +} } // Anonymous namespace std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { - EmitContext ctx; + EmitContext ctx{program}; for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { EmitInst(ctx, &inst); } } + std::string header = "!!NVcp5.0\n" + "OPTION NV_internal;"; + switch (program.stage) { + case Stage::Compute: + header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], + program.workgroup_size[1], program.workgroup_size[2]); + break; + default: + break; + } + header += "TEMP "; + for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { + header += fmt::format("R{},", index); + } + header += "RC;"; + if (!program.info.storage_buffers_descriptors.empty()) { + header += "LONG TEMP LC;"; + } + ctx.code.insert(0, header); + ctx.code += "END"; return ctx.code; } +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + Identity(inst, value); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index e69de29bb..72733d1cf 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -0,0 +1,125 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view size) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Indirect constant buffer loading"); + } + const std::string ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), ctx.reg_alloc.Consume(offset)); +} +} // Anonymous namespace + +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "U8"); +} + +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "S8"); +} + +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "U16"); +} + +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "S16"); +} + +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "U32"); +} + +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "F32"); +} + +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + GetCbuf(ctx, inst, binding, offset, "U32X2"); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + [[maybe_unused]] std::string_view vertex) { + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const u32 element{IR::GenericAttributeElement(attr)}; + ctx.Add("MOV.F {},in_attr{}.{};", inst, index, "xyzw"[element]); + return; + } + throw NotImplementedException("Get attribute {}", attr); +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("MOV.F out_attr{}.{},{};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("MOV.F result.position.{},{};", swizzle, value); + break; + default: + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitGetAttributeIndexed([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view vertex) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view vertex) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch patch) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch patch, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSetFragColor([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] u32 index, + [[maybe_unused]] u32 component, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSetSampleMask([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSetFragDepth([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index e69de29bb..db9dda261 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -0,0 +1,421 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.Add("MOV.F {},|{}|;", inst, value); +} + +void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.Add("ADD.F {},{},{};", inst, a, b); +} + +void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { + ctx.Add("MAD.F {},{},{},{};", inst, a, b, c); +} + +void EmitFPFma64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMax64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMin64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.Add("MUL.F {},{},{};", inst, a, b); +} + +void EmitFPMul64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + if (value[0] == '-') { + // Guard against negating a negative immediate + ctx.Add("MOV.F {},{};", inst, value.substr(1)); + } else { + ctx.Add("MOV.F {},-{};", inst, value); + } +} + +void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPCos([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPExp2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPLog2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRecip32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.Add("MOV.F.SAT {},{};", inst, value); +} + +void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFloor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPFloor64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPCeil32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPCeil64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPTrunc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPTrunc64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + const std::string ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("SEQ.F {},{},{};SNE.S {},{},0;", ret, lhs, rhs, ret, ret); +} + +void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + const std::string ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("SLT.F {},{},{};SNE.S {},{},0;", ret, lhs, rhs, ret, ret); +} + +void EmitFPOrdLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThan32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThan64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThan32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThan64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + const std::string ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("SLE.F {},{},{};SNE.S {},{},0;", ret, lhs, rhs, ret, ret); +} + +void EmitFPOrdLessThanEqual64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThanEqual32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordLessThanEqual64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPOrdGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFPUnordGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 21d6af914..30cc6c2eb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -20,9 +20,9 @@ namespace Shader::Backend::GLASM { class EmitContext; // Microinstruction emitters -void EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); -void EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBranch(EmitContext& ctx, std::string_view label); void EmitBranchConditional(EmitContext& ctx, std::string_view condition, std::string_view true_label, std::string_view false_label); @@ -47,14 +47,22 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); @@ -100,26 +108,33 @@ void EmitWriteGlobalS16(EmitContext& ctx); void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, std::string_view offset, std::string_view value); void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); @@ -203,12 +218,12 @@ void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view tru std::string_view false_value); void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitBitCastU16F16(EmitContext& ctx); -void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); -void EmitBitCastF64U64(EmitContext& ctx); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitPackUint2x32(EmitContext& ctx, std::string_view value); void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); @@ -224,26 +239,26 @@ void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); void EmitGetInBoundsFromOp(EmitContext& ctx); void EmitFPAbs16(EmitContext& ctx, std::string_view value); -void EmitFPAbs32(EmitContext& ctx, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPAbs64(EmitContext& ctx, std::string_view value); -void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c); -void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c); -void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c); void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitFPNeg16(EmitContext& ctx, std::string_view value); -void EmitFPNeg32(EmitContext& ctx, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPNeg64(EmitContext& ctx, std::string_view value); void EmitFPSin(EmitContext& ctx, std::string_view value); void EmitFPCos(EmitContext& ctx, std::string_view value); @@ -255,7 +270,7 @@ void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); void EmitFPSqrt(EmitContext& ctx, std::string_view value); void EmitFPSaturate16(EmitContext& ctx, std::string_view value); -void EmitFPSaturate32(EmitContext& ctx, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPSaturate64(EmitContext& ctx, std::string_view value); void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, std::string_view max_value); @@ -276,7 +291,7 @@ void EmitFPTrunc16(EmitContext& ctx, std::string_view value); void EmitFPTrunc32(EmitContext& ctx, std::string_view value); void EmitFPTrunc64(EmitContext& ctx, std::string_view value); void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); @@ -288,7 +303,8 @@ void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_v void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); @@ -300,7 +316,8 @@ void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::strin void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); @@ -314,7 +331,7 @@ void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std:: void EmitFPIsNan16(EmitContext& ctx, std::string_view value); void EmitFPIsNan32(EmitContext& ctx, std::string_view value); void EmitFPIsNan64(EmitContext& ctx, std::string_view value); -void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); @@ -329,14 +346,14 @@ void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::strin void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, std::string_view offset, std::string_view count); -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count); -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count); void EmitBitReverse32(EmitContext& ctx, std::string_view value); void EmitBitCount32(EmitContext& ctx, std::string_view value); @@ -347,9 +364,9 @@ void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max); -void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max); void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); @@ -544,36 +561,36 @@ void EmitBoundImageQueryLod(EmitContext&); void EmitBoundImageGradient(EmitContext&); void EmitBoundImageRead(EmitContext&); void EmitBoundImageWrite(EmitContext&); -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view bias_lc, const IR::Value& offset); -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view lod_lc, const IR::Value& offset); -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view bias_lc, const IR::Value& offset); -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view lod_lc, const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2); -void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2, std::string_view dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, std::string_view ms); -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view lod); -void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); -void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view derivates, std::string_view offset, std::string_view lod_clamp); -void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); @@ -597,27 +614,27 @@ void EmitBoundImageAtomicAnd32(EmitContext&); void EmitBoundImageAtomicOr32(EmitContext&); void EmitBoundImageAtomicXor32(EmitContext&); void EmitBoundImageAtomicExchange32(EmitContext&); -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); void EmitLaneId(EmitContext& ctx); void EmitVoteAll(EmitContext& ctx, std::string_view pred); @@ -629,15 +646,15 @@ void EmitSubgroupLtMask(EmitContext& ctx); void EmitSubgroupLeMask(EmitContext& ctx); void EmitSubgroupGtMask(EmitContext& ctx); void EmitSubgroupGeMask(EmitContext& ctx); -void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask); -void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask); -void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask); -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask); void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e69de29bb..e228fa072 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -0,0 +1,228 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view insert, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view count) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view count) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view count) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, + [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, + [[maybe_unused]] std::string_view max) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, + [[maybe_unused]] std::string_view max) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index e69de29bb..9e38a1bdf 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +void StorageOp(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view then_expr, std::string_view else_expr = {}) { + // Operate on bindless SSBO, call the expression with bounds checking + // address = c[binding].xy + // length = c[binding].z + const u32 sb_binding{binding.U32()}; + ctx.Add("PK64.U LC,c[{}];" // pointer = address + "CVT.U64.U32 LC.z,{};" // offset = uint64_t(offset) + "ADD.U64 LC.x,LC.x,LC.z;" // pointer += offset + "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length + sb_binding, offset, offset, sb_binding); + if (else_expr.empty()) { + ctx.Add("{}", then_expr); + } else { + ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); + } +} + +void Store(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value, std::string_view size) { + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, value)); +} + +void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, std::string_view offset, + std::string_view size) { + const std::string ret{ctx.reg_alloc.Define(inst)}; + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, ret), + fmt::format("MOV.U {},{{0,0,0,0}};", ret)); +} +} // Anonymous namespace + +void EmitLoadGlobalU8([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadGlobalS8([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadGlobalU16([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadGlobalS16([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadGlobal32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view address) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadGlobal64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view address) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadGlobal128([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view address) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobalU8([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobalS8([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobalU16([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobalS16([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobal32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view address, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobal64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view address, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteGlobal128([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view address, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "U8"); +} + +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "S8"); +} + +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "U16"); +} + +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "S16"); +} + +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "U32"); +} + +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "U32X2"); +} + +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + std::string_view offset) { + Load(ctx, inst, binding, offset, "U32X4"); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "U8"); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "S8"); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "U16"); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "S16"); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "U32"); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "U32X2"); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, std::string_view offset, + std::string_view value) { + Store(ctx, binding, offset, value, "U32X4"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index e90224e15..1337f4ae8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -15,11 +15,9 @@ namespace Shader::Backend::GLASM { -static void NotImplemented() { - throw NotImplementedException("GLASM instruction"); -} +#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) -void EmitPhi(EmitContext& ctx, IR::Inst* inst) { +void EmitPhi(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } @@ -27,10 +25,6 @@ void EmitVoid(EmitContext& ctx) { NotImplemented(); } -void EmitIdentity(EmitContext& ctx, const IR::Value& value) { - NotImplemented(); -} - void EmitBranch(EmitContext& ctx, std::string_view label) { NotImplemented(); } @@ -50,7 +44,7 @@ void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { } void EmitReturn(EmitContext& ctx) { - NotImplemented(); + ctx.Add("RET;"); } void EmitJoin(EmitContext& ctx) { @@ -78,11 +72,11 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { } void EmitPrologue(EmitContext& ctx) { - NotImplemented(); + // TODO } void EmitEpilogue(EmitContext& ctx) { - NotImplemented(); + // TODO } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { @@ -125,72 +119,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { - NotImplemented(); -} - -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex) { - NotImplemented(); -} - -void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { - NotImplemented(); -} - -void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, - std::string_view vertex) { - NotImplemented(); -} - -void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { - NotImplemented(); -} - -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { - NotImplemented(); -} - -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { - NotImplemented(); -} - -void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } @@ -275,125 +203,6 @@ void EmitUndefU64(EmitContext& ctx) { NotImplemented(); } -void EmitLoadGlobalU8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobalS8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobalU16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobalS16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { - NotImplemented(); -} - -void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { - NotImplemented(); -} - -void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { - NotImplemented(); -} - -void EmitWriteGlobalU8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobalS8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobalU16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobalS16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { - NotImplemented(); -} - -void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { - NotImplemented(); -} - -void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { - NotImplemented(); -} - -void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - NotImplemented(); -} - -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { NotImplemented(); } @@ -644,30 +453,6 @@ void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view tru NotImplemented(); } -void EmitBitCastU16F16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitBitCastU64F64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBitCastF16U16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitBitCastF64U64(EmitContext& ctx) { - NotImplemented(); -} - void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -724,340 +509,6 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitFPAbs16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPAbs32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPAbs64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c) { - NotImplemented(); -} - -void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c) { - NotImplemented(); -} - -void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c) { - NotImplemented(); -} - -void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPNeg16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPNeg32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPNeg64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSin(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCos(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPExp2(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPLog2(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecip32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecip64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSqrt(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { - NotImplemented(); -} - -void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { - NotImplemented(); -} - -void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { - NotImplemented(); -} - -void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPFloor16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPFloor32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPFloor64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCeil16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCeil32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCeil64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -1070,179 +521,6 @@ void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitINeg32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitINeg64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitIAbs32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitIAbs64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { - NotImplemented(); -} - -void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { - NotImplemented(); -} - -void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { - NotImplemented(); -} - -void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { - NotImplemented(); -} - -void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift) { - NotImplemented(); -} - -void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift) { - NotImplemented(); -} - -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, - std::string_view offset, std::string_view count) { - NotImplemented(); -} - -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, - std::string_view offset, std::string_view count) { - NotImplemented(); -} - -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, - std::string_view offset, std::string_view count) { - NotImplemented(); -} - -void EmitBitReverse32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitBitCount32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitBitwiseNot32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFindSMsb32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFindUMsb32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, - std::string_view max) { - NotImplemented(); -} - -void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, - std::string_view max) { - NotImplemented(); -} - -void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, std::string_view value) { NotImplemented(); @@ -1858,69 +1136,69 @@ void EmitBoundImageWrite(EmitContext&) { NotImplemented(); } -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view bias_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view lod_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view bias_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view lod_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { NotImplemented(); } -void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2, std::string_view dref) { NotImplemented(); } -void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, std::string_view ms) { NotImplemented(); } -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view lod) { NotImplemented(); } -void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { NotImplemented(); } -void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view derivates, std::string_view offset, std::string_view lod_clamp) { NotImplemented(); } -void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { NotImplemented(); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color) { NotImplemented(); } @@ -2013,57 +1291,57 @@ void EmitBoundImageAtomicExchange32(EmitContext&) { NotImplemented(); } -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { NotImplemented(); } @@ -2108,24 +1386,24 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { NotImplemented(); } -void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { NotImplemented(); } -void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { NotImplemented(); } -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { NotImplemented(); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index c73851d11..beaf149f3 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -224,6 +224,12 @@ public: --use_count; } + /// Destructively add usages to the instruction + /// Useful for register allocation + void DestructiveAddUsage(int count) { + use_count += count; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} From 2c81ad831192a8234e26a61706f18b460999c89f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:34:41 -0300 Subject: [PATCH 339/770] glasm: Initial GLASM compute implementation for testing --- .../renderer_opengl/gl_compute_program.cpp | 17 ++++++--- .../renderer_opengl/gl_compute_program.h | 7 ++-- .../renderer_opengl/gl_shader_cache.cpp | 37 ++++++++++++++++--- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index d5ef65439..fb54618a4 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -29,11 +29,11 @@ bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_) + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, - program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { for (const auto& desc : info.texture_buffer_descriptors) { num_texture_buffers += desc.count; } @@ -124,6 +124,14 @@ void ComputeProgram::Configure() { const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); + if (assembly_program.handle != 0) { + // FIXME: State track this + glEnable(GL_COMPUTE_PROGRAM_NV); + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(source_program.handle); + } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; const auto add_buffer{[&](const auto& desc) { @@ -172,7 +180,6 @@ void ComputeProgram::Configure() { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - program_manager.BindProgram(program.handle); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h index 64a75d44d..ddb00dc1d 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -52,8 +52,8 @@ public: explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_); + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); @@ -64,8 +64,9 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; ProgramManager& program_manager; - OGLProgram program; Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9bbdfeb62..d9f0bca78 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -89,6 +90,7 @@ const Shader::Profile profile{ .xfb_varyings = {}, }; +using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -151,6 +153,22 @@ void LinkProgram(GLuint program) { } } +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (!Settings::values.renderer_debug) { + return program; + } + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + LOG_CRITICAL(Render_OpenGL, "{}", err); + LOG_INFO(Render_OpenGL, "{}", code); + } + return program; +} + GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -294,13 +312,20 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(profile, program)}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); - LinkProgram(gl_program.handle); + OGLAssemblyProgram asm_program; + OGLProgram source_program; + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + const std::vector code{EmitSPIRV(profile, program)}; + source_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, std::move(gl_program), program.info); + program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL From dc02cb92e43d2ef05197e4edb2573116d7ae58c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:36:51 -0300 Subject: [PATCH 340/770] gl_rasterizer: Flush L2 caches before glFlush on GLASM --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e527b76ba..4834d58f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -473,6 +473,14 @@ void RasterizerOpenGL::FlushCommands() { return; } num_queued_commands = 0; + + // Make sure memory stored from the previous GL command stream is visible + // This is only needed on assembly shaders where we write to GPU memory with raw pointers + // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used + // and prefer using NV_shader_storage_buffer_object when possible + if (Settings::values.use_assembly_shaders.GetValue()) { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + } glFlush(); } From 2b04b4d27fc38d9865cef5bf8eabb335bc29eb83 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:43:26 -0300 Subject: [PATCH 341/770] glasm: Remove unused argument in identity instructions on GLASM --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 65600f58c..7ec880c81 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -124,31 +124,31 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { return ctx.code; } -void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } -void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } -void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } -void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } -void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } -void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } -void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { +void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Identity(inst, value); } From 3e841f6441903c6e97307dd49a2543ce82654044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:46:32 -0300 Subject: [PATCH 342/770] glasm: Use BitField instead of C bitfields --- src/shader_recompiler/backend/glasm/reg_alloc.cpp | 10 +++++----- src/shader_recompiler/backend/glasm/reg_alloc.h | 10 +++++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 55e8107e9..010ad0275 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -68,11 +68,11 @@ Id RegAlloc::Alloc() { } num_used_registers = std::max(num_used_registers, reg + 1); register_use[reg] = true; - return Id{ - .index = static_cast(reg), - .is_spill = 0, - .is_condition_code = 0, - }; + Id ret{}; + ret.index.Assign(static_cast(reg)); + ret.is_spill.Assign(0); + ret.is_condition_code.Assign(0); + return ret; } throw NotImplementedException("Register spilling"); } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 83d728d20..f73aa3348 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -6,6 +6,7 @@ #include +#include "common/bit_field.h" #include "common/common_types.h" namespace Shader::IR { @@ -18,9 +19,12 @@ namespace Shader::Backend::GLASM { class EmitContext; struct Id { - u32 index : 30; - u32 is_spill : 1; - u32 is_condition_code : 1; + union { + u32 raw; + BitField<0, 30, u32> index; + BitField<30, 1, u32> is_spill; + BitField<31, 1, u32> is_condition_code; + }; }; class RegAlloc { From 941c6dc740ed368edbbc00913ace73dddecd43ba Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 8 May 2021 18:50:10 -0400 Subject: [PATCH 343/770] glasm: Implement BFI, BFE Along with implementations of common instructions along the way --- .../backend/glasm/emit_glasm_instructions.h | 66 +++---- .../backend/glasm/emit_glasm_integer.cpp | 174 ++++++++++-------- .../glasm/emit_glasm_not_implemented.cpp | 40 ---- .../backend/glasm/emit_glasm_select.cpp | 50 +++++ 4 files changed, 178 insertions(+), 152 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 30cc6c2eb..13f47b253 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -208,8 +208,8 @@ void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true std::string_view false_value); void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, @@ -332,14 +332,14 @@ void EmitFPIsNan16(EmitContext& ctx, std::string_view value); void EmitFPIsNan32(EmitContext& ctx, std::string_view value); void EmitFPIsNan64(EmitContext& ctx, std::string_view value); void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitINeg32(EmitContext& ctx, std::string_view value); -void EmitINeg64(EmitContext& ctx, std::string_view value); -void EmitIAbs32(EmitContext& ctx, std::string_view value); -void EmitIAbs64(EmitContext& ctx, std::string_view value); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); @@ -349,35 +349,39 @@ void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::st void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, - std::string_view offset, std::string_view count); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count); void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count); void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count); -void EmitBitReverse32(EmitContext& ctx, std::string_view value); -void EmitBitCount32(EmitContext& ctx, std::string_view value); -void EmitBitwiseNot32(EmitContext& ctx, std::string_view value); -void EmitFindSMsb32(EmitContext& ctx, std::string_view value); -void EmitFindUMsb32(EmitContext& ctx, std::string_view value); -void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max); void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max); -void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, std::string_view value); void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e228fa072..d4e519a2a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -12,42 +12,46 @@ namespace Shader::Backend::GLASM { void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.Add("ADD {},{},{};", inst, a, b); +} + +void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } -void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.Add("SUB {},{},{};", inst, a, b); +} + +void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } -void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } -void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } @@ -94,7 +98,7 @@ void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("OR {},{},{};", inst, a, b); } void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -102,64 +106,66 @@ void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In throw NotImplementedException("GLASM instruction"); } -void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view insert, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view count) { +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count) { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); + ctx.Add("BFI.U {},RC,{},{};", inst, insert, base); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); + ctx.Add("BFE.S {},RC,{};", inst, base); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); + ctx.Add("BFE.U {},RC,{};", inst, base); +} + +void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.Add("BFR {},{};", inst, value); +} + +void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view count) { +void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view count) { +void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLASM instruction"); } -void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } -void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } -void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } -void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLASM instruction"); } @@ -175,54 +181,60 @@ void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& throw NotImplementedException("GLASM instruction"); } -void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { + ctx.Add("SLT.S {},{},{};", inst, lhs, rhs); } -void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { + ctx.Add("SLT.U {},{},{};", inst, lhs, rhs); } -void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { + ctx.Add("SEQ {},{},{};", inst, lhs, rhs); } -void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, +void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("SLE.S {},{},{};", inst, lhs, rhs); } -void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, +void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("SLE.U {},{},{};", inst, lhs, rhs); } -void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, +void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("SGT.S {},{},{};", inst, lhs, rhs); } -void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, +void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { + ctx.Add("SGT.U {},{},{};", inst, lhs, rhs); +} + +void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, +void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("SGE.S {},{},{};", inst, lhs, rhs); } -void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, +void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("SGE.U {},{},{};", inst, lhs, rhs); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 1337f4ae8..32eb87837 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -413,46 +413,6 @@ void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std: NotImplemented(); } -void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index e69de29bb..636cbe8a0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitSelectU1(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU8(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU16(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.Add("MOV.U.CC RC,{};", cond); + ctx.Add("IF NE.x;"); + ctx.Add("MOV.U {},{};", inst, true_value); + ctx.Add("ELSE;"); + ctx.Add("MOV.U {},{};", inst, false_value); + ctx.Add("ENDIF;"); +} + +void EmitSelectU64(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF16(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF32(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF64(EmitContext&, std::string_view, std::string_view, std::string_view) { + throw NotImplementedException("GLASM instruction"); +} +} // namespace Shader::Backend::GLASM From 68cc445b8ef7c7057087e2b6c7015d888a80bac2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 8 May 2021 18:59:05 -0400 Subject: [PATCH 344/770] glasm: Implement more logical ops --- .../backend/glasm/emit_glasm_integer.cpp | 8 ++++---- src/shader_recompiler/backend/glasm/reg_alloc.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index d4e519a2a..1289d950f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -93,7 +93,7 @@ void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("AND {},{},{};", inst, a, b); } void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -103,7 +103,7 @@ void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("XOR {},{},{};", inst, a, b); } void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, @@ -136,7 +136,7 @@ void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("NOT {},{};", inst, value); } void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -222,7 +222,7 @@ void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLASM instruction"); + ctx.Add("SNE.U {},{},{};", inst, lhs, rhs); } void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 010ad0275..e198dd522 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -33,7 +33,7 @@ std::string ImmValue(const IR::Value& value) { case IR::Type::F32: return fmt::format("{}", value.F32()); default: - throw NotImplementedException("Immediate type", value.Type()); + throw NotImplementedException("Immediate type {}", value.Type()); } } } // Anonymous namespace From 934d3002462e27bcc289c1edb4959896cb23beb0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 8 May 2021 19:21:32 -0400 Subject: [PATCH 345/770] glasm: Use CMP.S for Select32 also fixes ADD and SUB to use U modifier --- .../backend/glasm/emit_glasm_instructions.h | 4 ++-- .../backend/glasm/emit_glasm_integer.cpp | 4 ++-- .../backend/glasm/emit_glasm_select.cpp | 12 ++++-------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 13f47b253..222285021 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -214,8 +214,8 @@ void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view tru std::string_view false_value); void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 1289d950f..579806c38 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -12,7 +12,7 @@ namespace Shader::Backend::GLASM { void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("ADD {},{},{};", inst, a, b); + ctx.Add("ADD.U {},{},{};", inst, a, b); } void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -22,7 +22,7 @@ void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("SUB {},{},{};", inst, a, b); + ctx.Add("SUB.U {},{},{};", inst, a, b); } void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index 636cbe8a0..16f6c33f3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -24,12 +24,7 @@ void EmitSelectU16(EmitContext&, std::string_view, std::string_view, std::string void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.Add("MOV.U.CC RC,{};", cond); - ctx.Add("IF NE.x;"); - ctx.Add("MOV.U {},{};", inst, true_value); - ctx.Add("ELSE;"); - ctx.Add("MOV.U {},{};", inst, false_value); - ctx.Add("ENDIF;"); + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); } void EmitSelectU64(EmitContext&, std::string_view, std::string_view, std::string_view) { @@ -40,8 +35,9 @@ void EmitSelectF16(EmitContext&, std::string_view, std::string_view, std::string throw NotImplementedException("GLASM instruction"); } -void EmitSelectF32(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); } void EmitSelectF64(EmitContext&, std::string_view, std::string_view, std::string_view) { From 1c9307969c4e3f6206947f1360acae33f95a4a86 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 03:11:34 -0300 Subject: [PATCH 346/770] glasm: Make GLASM aware of types --- .../backend/glasm/emit_context.h | 8 +- .../backend/glasm/emit_glasm.cpp | 95 +- .../backend/glasm/emit_glasm_composite.cpp | 225 +++++ .../glasm/emit_glasm_context_get_set.cpp | 53 +- .../glasm/emit_glasm_floating_point.cpp | 299 +++---- .../backend/glasm/emit_glasm_instructions.h | 844 ++++++++---------- .../backend/glasm/emit_glasm_integer.cpp | 244 +++-- .../backend/glasm/emit_glasm_memory.cpp | 77 +- .../glasm/emit_glasm_not_implemented.cpp | 515 ++++------- .../backend/glasm/emit_glasm_select.cpp | 46 - .../backend/glasm/reg_alloc.cpp | 66 +- .../backend/glasm/reg_alloc.h | 184 +++- 12 files changed, 1396 insertions(+), 1260 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 4f98a9816..a59acbf6c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -23,15 +23,15 @@ public: explicit EmitContext(IR::Program& program); template - void Add(const char* fmt, IR::Inst& inst, Args&&... args) { - code += fmt::format(fmt, reg_alloc.Define(inst), std::forward(args)...); + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); // TODO: Remove this code += '\n'; } template - void Add(const char* fmt, Args&&... args) { - code += fmt::format(fmt, std::forward(args)...); + void Add(const char* format_str, Args&&... args) { + code += fmt::format(format_str, std::forward(args)...); // TODO: Remove this code += '\n'; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 7ec880c81..8981cf300 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -27,22 +27,80 @@ struct FuncTraits { using ArgType = std::tuple_element_t>; }; +template +struct Identity { + Identity(const T& data_) : data{data_} {} + + const T& Extract() { + return data; + } + + T data; +}; + +template +struct RegWrapper { + RegWrapper(EmitContext& ctx, Value value) + : reg_alloc{ctx.reg_alloc}, allocated{value.type != Type::Register} { + reg = allocated ? reg_alloc.AllocReg() : Register{value}; + switch (value.type) { + case Type::Register: + break; + case Type::U32: + ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); + break; + case Type::S32: + ctx.Add("MOV.S {}.x,{};", reg, value.imm_s32); + break; + case Type::F32: + ctx.Add("MOV.F {}.x,{};", reg, value.imm_f32); + break; + } + } + ~RegWrapper() { + if (allocated) { + reg_alloc.FreeReg(reg); + } + } + + auto Extract() { + return std::conditional_t{Value{reg}}; + } + + RegAlloc& reg_alloc; + Register reg{}; + bool allocated{}; +}; + template auto Arg(EmitContext& ctx, const IR::Value& arg) { - if constexpr (std::is_same_v) { - return ctx.reg_alloc.Consume(arg); + if constexpr (std::is_same_v) { + return RegWrapper{ctx, ctx.reg_alloc.Consume(arg)}; + } else if constexpr (std::is_same_v) { + return RegWrapper{ctx, ctx.reg_alloc.Consume(arg)}; + } else if constexpr (std::is_base_of_v) { + return Identity{ArgType{ctx.reg_alloc.Consume(arg)}}; } else if constexpr (std::is_same_v) { - return arg; + return Identity{arg}; } else if constexpr (std::is_same_v) { - return arg.U32(); + return Identity{arg.U32()}; } else if constexpr (std::is_same_v) { - return arg.Label(); + return Identity{arg.Label()}; } else if constexpr (std::is_same_v) { - return arg.Attribute(); + return Identity{arg.Attribute()}; } else if constexpr (std::is_same_v) { - return arg.Patch(); + return Identity{arg.Patch()}; } else if constexpr (std::is_same_v) { - return arg.Reg(); + return Identity{arg.Reg()}; + } +} + +template +void InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, std::forward(args.Extract())...); + } else { + func(ctx, std::forward(args.Extract())...); } } @@ -50,9 +108,10 @@ template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; if constexpr (is_first_arg_inst) { - func(ctx, *inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, *inst, + Arg>(ctx, inst->Arg(I)).Extract()...); } else { - func(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I)).Extract()...); } } @@ -81,7 +140,7 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } -void Identity(IR::Inst& inst, const IR::Value& value) { +void Alias(IR::Inst& inst, const IR::Value& value) { if (value.IsImmediate()) { return; } @@ -125,31 +184,31 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { } void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Identity(inst, value); + Alias(inst, value); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index e69de29bb..063dcaf13 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -0,0 +1,225 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +template +void CompositeConstructU32(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (std::ranges::any_of(std::array{elements...}, + [](const IR::Value& value) { return value.IsImmediate(); })) { + const std::array values{(elements.IsImmediate() ? elements.U32() : 0)...}; + ctx.Add("MOV.U {},{{{},{},{},{}}};", ret, fmt::to_string(values[0]), + fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); + } + size_t index{}; + for (const IR::Value& element : {elements...}) { + if (!element.IsImmediate()) { + const ScalarU32 value{ctx.reg_alloc.Consume(element)}; + ctx.Add("MOV.U {}.{},{};", ret, "xyzw"[index], value); + } + ++index; + } +} + +void CompositeExtractU32(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ret == composite && index == 0) { + // No need to do anything here, the source and destination are the same register + return; + } + ctx.Add("MOV.U {}.x,{}.{};", ret, composite, "xyzw"[index]); +} +} // Anonymous namespace + +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2) { + CompositeConstructU32(ctx, inst, e1, e2); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3) { + CompositeConstructU32(ctx, inst, e1, e2, e3); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { + CompositeConstructU32(ctx, inst, e1, e2, e3, e4); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtractU32(ctx, inst, composite, index); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtractU32(ctx, inst, composite, index); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtractU32(ctx, inst, composite, index); +} + +void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, + [[maybe_unused]] Register e2, [[maybe_unused]] Register e3, + [[maybe_unused]] Register e4) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1, + [[maybe_unused]] ScalarF32 e2) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1, + [[maybe_unused]] ScalarF32 e2, [[maybe_unused]] ScalarF32 e3) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1, + [[maybe_unused]] ScalarF32 e2, [[maybe_unused]] ScalarF32 e3, + [[maybe_unused]] ScalarF32 e4) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, + [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] Register composite, [[maybe_unused]] Register object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 72733d1cf..fed79e381 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -10,64 +10,58 @@ namespace Shader::Backend::GLASM { namespace { -void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, +void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { if (!binding.IsImmediate()) { throw NotImplementedException("Indirect constant buffer loading"); } - const std::string ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), ctx.reg_alloc.Consume(offset)); + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); } } // Anonymous namespace -void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "U8"); } -void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "S8"); } -void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "U16"); } -void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "S16"); } -void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "U32"); } -void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "F32"); } void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset) { + ScalarU32 offset) { GetCbuf(ctx, inst, binding, offset, "U32X2"); } void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, - [[maybe_unused]] std::string_view vertex) { + [[maybe_unused]] ScalarU32 vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const u32 element{IR::GenericAttributeElement(attr)}; - ctx.Add("MOV.F {},in_attr{}.{};", inst, index, "xyzw"[element]); + ctx.Add("MOV.F {}.x,in_attr{}.{};", inst, index, "xyzw"[element]); return; } throw NotImplementedException("Get attribute {}", attr); } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - [[maybe_unused]] std::string_view vertex) { +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, + [[maybe_unused]] ScalarU32 vertex) { const u32 element{static_cast(attr) % 4}; const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { @@ -87,16 +81,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val } } -void EmitGetAttributeIndexed([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view vertex) { +void EmitGetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarU32 vertex) { throw NotImplementedException("GLASM instruction"); } -void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view value, - [[maybe_unused]] std::string_view vertex) { +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { throw NotImplementedException("GLASM instruction"); } @@ -105,20 +96,20 @@ void EmitGetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch } void EmitSetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch patch, - [[maybe_unused]] std::string_view value) { + [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } void EmitSetFragColor([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] u32 index, - [[maybe_unused]] u32 component, [[maybe_unused]] std::string_view value) { + [[maybe_unused]] u32 component, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitSetSampleMask([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitSetSampleMask([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitSetFragDepth([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitSetFragDepth([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index db9dda261..fed6503c6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -10,411 +10,382 @@ namespace Shader::Backend::GLASM { -void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.Add("MOV.F {},|{}|;", inst, value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("MOV.F {}.x,|{}|;", inst, value); } -void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } -void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.Add("ADD.F {},{},{};", inst, a, b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("ADD.F {}.x,{},{};", inst, a, b); } void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, - [[maybe_unused]] std::string_view c) { + [[maybe_unused]] Register a, [[maybe_unused]] Register b, + [[maybe_unused]] Register c) { throw NotImplementedException("GLASM instruction"); } -void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c) { - ctx.Add("MAD.F {},{},{},{};", inst, a, b, c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { + ctx.Add("MAD.F {}.x,{},{},{};", inst, a, b, c); } void EmitFPFma64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, - [[maybe_unused]] std::string_view c) { + [[maybe_unused]] Register a, [[maybe_unused]] Register b, + [[maybe_unused]] Register c) { throw NotImplementedException("GLASM instruction"); } -void EmitFPMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitFPMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a, + [[maybe_unused]] ScalarF32 b) { throw NotImplementedException("GLASM instruction"); } -void EmitFPMax64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitFPMax64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, + [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } -void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a, + [[maybe_unused]] ScalarF32 b) { throw NotImplementedException("GLASM instruction"); } -void EmitFPMin64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view a, - [[maybe_unused]] std::string_view b) { +void EmitFPMin64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, + [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } -void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.Add("MUL.F {},{},{};", inst, a, b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MUL.F {}.x,{},{};", inst, a, b); } void EmitFPMul64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + [[maybe_unused]] Register a, [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } -void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - if (value[0] == '-') { - // Guard against negating a negative immediate - ctx.Add("MOV.F {},{};", inst, value.substr(1)); - } else { - ctx.Add("MOV.F {},-{};", inst, value); - } +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { + ctx.Add("MOV.F {}.x,-{};", inst, value); } -void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPCos([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPCos([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPExp2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPExp2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPLog2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPLog2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRecip32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRecip32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.Add("MOV.F.SAT {},{};", inst, value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("MOV.F.SAT {}.x,{};", inst, value); } -void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value, - [[maybe_unused]] std::string_view min_value, - [[maybe_unused]] std::string_view max_value) { +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, + [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value, - [[maybe_unused]] std::string_view min_value, - [[maybe_unused]] std::string_view max_value) { +void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value, + [[maybe_unused]] ScalarF32 min_value, [[maybe_unused]] ScalarF32 max_value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value, - [[maybe_unused]] std::string_view min_value, - [[maybe_unused]] std::string_view max_value) { +void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, + [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPFloor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPFloor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPFloor64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPFloor64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPCeil32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPCeil32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPCeil64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPCeil64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPTrunc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPTrunc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPTrunc64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { +void EmitFPTrunc64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs) { - const std::string ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("SEQ.F {},{},{};SNE.S {},{},0;", ret, lhs, rhs, ret, ret); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("SEQ.F {}.x,{},{};SNE.S {}.x,{},0;", ret, lhs, rhs, ret, ret); } -void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs) { - const std::string ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("SLT.F {},{},{};SNE.S {},{},0;", ret, lhs, rhs, ret, ret); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("SLT.F {}.x,{},{};SNE.S {}.x,{}.x,0;", ret, lhs, rhs, ret, ret); } -void EmitFPOrdLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThan32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThan64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThan32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThan64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs) { - const std::string ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("SLE.F {},{},{};SNE.S {},{},0;", ret, lhs, rhs, ret, ret); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("SLE.F {}.x,{},{};SNE.S {}.x,{}.x,0;", ret, lhs, rhs, ret, ret); } -void EmitFPOrdLessThanEqual64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThanEqual32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordLessThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThanEqual64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, + [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPOrdGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } void EmitFPUnordGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { + [[maybe_unused]] ScalarF32 lhs, [[maybe_unused]] ScalarF32 rhs) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitFPUnordGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, + [[maybe_unused]] Register rhs) { throw NotImplementedException("GLASM instruction"); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 222285021..6db76bf46 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -4,9 +4,8 @@ #pragma once -#include - #include "common/common_types.h" +#include "shader_recompiler/backend/glasm/reg_alloc.h" namespace Shader::IR { enum class Attribute : u64; @@ -23,15 +22,14 @@ class EmitContext; void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBranch(EmitContext& ctx, std::string_view label); -void EmitBranchConditional(EmitContext& ctx, std::string_view condition, - std::string_view true_label, std::string_view false_label); -void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); -void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); +void EmitBranch(EmitContext& ctx); +void EmitBranchConditional(EmitContext& ctx); +void EmitLoopMerge(EmitContext& ctx); +void EmitSelectionMerge(EmitContext& ctx); void EmitReturn(EmitContext& ctx); void EmitJoin(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); @@ -47,32 +45,22 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, - std::string_view vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex); -void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, - std::string_view vertex); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarU32 vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); void EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); -void EmitSetSampleMask(EmitContext& ctx, std::string_view value); -void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); +void EmitSetSampleMask(EmitContext& ctx, ScalarF32 value); +void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); @@ -82,13 +70,13 @@ void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); void EmitWorkgroupId(EmitContext& ctx); -void EmitLocalInvocationId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx); -void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); -void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitLoadLocal(EmitContext& ctx, ScalarU32 word_offset); +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); void EmitUndefU1(EmitContext& ctx); void EmitUndefU8(EmitContext& ctx); void EmitUndefU16(EmitContext& ctx); @@ -98,368 +86,321 @@ void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); -void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); -void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal32(EmitContext& ctx, Register address); +void EmitLoadGlobal64(EmitContext& ctx, Register address); +void EmitLoadGlobal128(EmitContext& ctx, Register address); void EmitWriteGlobalU8(EmitContext& ctx); void EmitWriteGlobalS8(EmitContext& ctx); void EmitWriteGlobalU16(EmitContext& ctx); void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); +void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); + ScalarU32 offset); void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); + ScalarU32 offset); void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); + ScalarU32 offset); void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); + ScalarU32 offset); void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); + ScalarU32 offset); void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); + ScalarU32 offset); void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value); -void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); -void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); + ScalarU32 offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value); +void EmitLoadSharedU8(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedS8(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU16(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedS16(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU32(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU64(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU128(EmitContext& ctx, ScalarU32 offset); +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value); +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3, + Register e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2, ScalarF32 e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2, ScalarF32 e3, + ScalarF32 e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, Register composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, Register composite, ScalarF32 object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, Register composite, ScalarF32 object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, Register composite, ScalarF32 object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, - std::string_view true_value, std::string_view false_value); -void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, - std::string_view true_value, std::string_view false_value); -void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); +void EmitSelectU1(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitSelectF32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitPackUint2x32(EmitContext& ctx, std::string_view value); -void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); -void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); -void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitPackUint2x32(EmitContext& ctx, Register value); +void EmitUnpackUint2x32(EmitContext& ctx, Register value); +void EmitPackFloat2x16(EmitContext& ctx, Register value); +void EmitUnpackFloat2x16(EmitContext& ctx, Register value); +void EmitPackHalf2x16(EmitContext& ctx, Register value); +void EmitUnpackHalf2x16(EmitContext& ctx, Register value); +void EmitPackDouble2x32(EmitContext& ctx, Register value); +void EmitUnpackDouble2x32(EmitContext& ctx, Register value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); void EmitGetInBoundsFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx, std::string_view value); -void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitFPAbs64(EmitContext& ctx, std::string_view value); -void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c); -void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c); -void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c); -void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPNeg16(EmitContext& ctx, std::string_view value); -void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitFPNeg64(EmitContext& ctx, std::string_view value); -void EmitFPSin(EmitContext& ctx, std::string_view value); -void EmitFPCos(EmitContext& ctx, std::string_view value); -void EmitFPExp2(EmitContext& ctx, std::string_view value); -void EmitFPLog2(EmitContext& ctx, std::string_view value); -void EmitFPRecip32(EmitContext& ctx, std::string_view value); -void EmitFPRecip64(EmitContext& ctx, std::string_view value); -void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); -void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); -void EmitFPSqrt(EmitContext& ctx, std::string_view value); -void EmitFPSaturate16(EmitContext& ctx, std::string_view value); -void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitFPSaturate64(EmitContext& ctx, std::string_view value); -void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); -void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); -void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); -void EmitFPFloor16(EmitContext& ctx, std::string_view value); -void EmitFPFloor32(EmitContext& ctx, std::string_view value); -void EmitFPFloor64(EmitContext& ctx, std::string_view value); -void EmitFPCeil16(EmitContext& ctx, std::string_view value); -void EmitFPCeil32(EmitContext& ctx, std::string_view value); -void EmitFPCeil64(EmitContext& ctx, std::string_view value); -void EmitFPTrunc16(EmitContext& ctx, std::string_view value); -void EmitFPTrunc32(EmitContext& ctx, std::string_view value); -void EmitFPTrunc64(EmitContext& ctx, std::string_view value); -void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); -void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPIsNan16(EmitContext& ctx, std::string_view value); -void EmitFPIsNan32(EmitContext& ctx, std::string_view value); -void EmitFPIsNan64(EmitContext& ctx, std::string_view value); -void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, - std::string_view insert, std::string_view offset, std::string_view count); -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, - std::string_view offset, std::string_view count); -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, - std::string_view offset, std::string_view count); -void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, - std::string_view max); -void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, - std::string_view max); -void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); -void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); -void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); +void EmitFPAbs16(EmitContext& ctx, Register value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPAbs64(EmitContext& ctx, Register value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); +void EmitFPMax32(EmitContext& ctx, ScalarF32 a, ScalarF32 b); +void EmitFPMax64(EmitContext& ctx, Register a, Register b); +void EmitFPMin32(EmitContext& ctx, ScalarF32 a, ScalarF32 b); +void EmitFPMin64(EmitContext& ctx, Register a, Register b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPNeg16(EmitContext& ctx, Register value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); +void EmitFPNeg64(EmitContext& ctx, Register value); +void EmitFPSin(EmitContext& ctx, ScalarF32 value); +void EmitFPCos(EmitContext& ctx, ScalarF32 value); +void EmitFPExp2(EmitContext& ctx, ScalarF32 value); +void EmitFPLog2(EmitContext& ctx, ScalarF32 value); +void EmitFPRecip32(EmitContext& ctx, ScalarF32 value); +void EmitFPRecip64(EmitContext& ctx, Register value); +void EmitFPRecipSqrt32(EmitContext& ctx, ScalarF32 value); +void EmitFPRecipSqrt64(EmitContext& ctx, Register value); +void EmitFPSqrt(EmitContext& ctx, ScalarF32 value); +void EmitFPSaturate16(EmitContext& ctx, Register value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPSaturate64(EmitContext& ctx, Register value); +void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); +void EmitFPClamp32(EmitContext& ctx, ScalarF32 value, ScalarF32 min_value, ScalarF32 max_value); +void EmitFPClamp64(EmitContext& ctx, Register value, Register min_value, Register max_value); +void EmitFPRoundEven16(EmitContext& ctx, Register value); +void EmitFPRoundEven32(EmitContext& ctx, ScalarF32 value); +void EmitFPRoundEven64(EmitContext& ctx, Register value); +void EmitFPFloor16(EmitContext& ctx, Register value); +void EmitFPFloor32(EmitContext& ctx, ScalarF32 value); +void EmitFPFloor64(EmitContext& ctx, Register value); +void EmitFPCeil16(EmitContext& ctx, Register value); +void EmitFPCeil32(EmitContext& ctx, ScalarF32 value); +void EmitFPCeil64(EmitContext& ctx, Register value); +void EmitFPTrunc16(EmitContext& ctx, Register value); +void EmitFPTrunc32(EmitContext& ctx, ScalarF32 value); +void EmitFPTrunc64(EmitContext& ctx, Register value); +void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPIsNan16(EmitContext& ctx, Register value); +void EmitFPIsNan32(EmitContext& ctx, ScalarF32 value); +void EmitFPIsNan64(EmitContext& ctx, Register value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitIAdd64(EmitContext& ctx, Register a, Register b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitISub64(EmitContext& ctx, Register a, Register b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitINeg32(EmitContext& ctx, ScalarS32 value); +void EmitINeg64(EmitContext& ctx, Register value); +void EmitIAbs32(EmitContext& ctx, ScalarS32 value); +void EmitIAbs64(EmitContext& ctx, Register value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); +void EmitShiftLeftLogical64(EmitContext& ctx, Register base, Register shift); +void EmitShiftRightLogical32(EmitContext& ctx, ScalarU32 base, ScalarU32 shift); +void EmitShiftRightLogical64(EmitContext& ctx, Register base, Register shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, ScalarS32 base, ScalarS32 shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, Register base, Register shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitBitFieldInsert(EmitContext& ctx, ScalarS32 base, ScalarS32 insert, ScalarS32 offset, + ScalarS32 count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, + ScalarS32 count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, + ScalarU32 count); +void EmitBitReverse32(EmitContext& ctx, ScalarS32 value); +void EmitBitCount32(EmitContext& ctx, ScalarS32 value); +void EmitBitwiseNot32(EmitContext& ctx, ScalarS32 value); +void EmitFindSMsb32(EmitContext& ctx, ScalarS32 value); +void EmitFindUMsb32(EmitContext& ctx, ScalarU32 value); +void EmitSMin32(EmitContext& ctx, ScalarS32 a, ScalarS32 b); +void EmitUMin32(EmitContext& ctx, ScalarU32 a, ScalarU32 b); +void EmitSMax32(EmitContext& ctx, ScalarS32 a, ScalarS32 b); +void EmitUMax32(EmitContext& ctx, ScalarU32 a, ScalarU32 b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); +void EmitSLessThan(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThan(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); +void EmitIEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); +void EmitSLessThanEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThanEqual(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); +void EmitSGreaterThan(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThan(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); +void EmitINotEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicSMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value); +void EmitSharedAtomicUMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicSMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value); +void EmitSharedAtomicUMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicInc32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicDec32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicAnd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicOr32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicXor32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicExchange32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); +void EmitSharedAtomicExchange64(EmitContext& ctx, ScalarU32 pointer_offset, Register value); void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarS32 value); void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarS32 value); void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarU32 value); void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value); + const IR::Value& offset, ScalarU32 value); void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value); + const IR::Value& offset, Register value); void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + ScalarF32 value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + Register value); void EmitGlobalAtomicIAdd32(EmitContext& ctx); void EmitGlobalAtomicSMin32(EmitContext& ctx); void EmitGlobalAtomicUMin32(EmitContext& ctx); @@ -489,58 +430,58 @@ void EmitGlobalAtomicMinF16x2(EmitContext& ctx); void EmitGlobalAtomicMinF32x2(EmitContext& ctx); void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalNot(EmitContext& ctx, std::string_view value); -void EmitConvertS16F16(EmitContext& ctx, std::string_view value); -void EmitConvertS16F32(EmitContext& ctx, std::string_view value); -void EmitConvertS16F64(EmitContext& ctx, std::string_view value); -void EmitConvertS32F16(EmitContext& ctx, std::string_view value); -void EmitConvertS32F32(EmitContext& ctx, std::string_view value); -void EmitConvertS32F64(EmitContext& ctx, std::string_view value); -void EmitConvertS64F16(EmitContext& ctx, std::string_view value); -void EmitConvertS64F32(EmitContext& ctx, std::string_view value); -void EmitConvertS64F64(EmitContext& ctx, std::string_view value); -void EmitConvertU16F16(EmitContext& ctx, std::string_view value); -void EmitConvertU16F32(EmitContext& ctx, std::string_view value); -void EmitConvertU16F64(EmitContext& ctx, std::string_view value); -void EmitConvertU32F16(EmitContext& ctx, std::string_view value); -void EmitConvertU32F32(EmitContext& ctx, std::string_view value); -void EmitConvertU32F64(EmitContext& ctx, std::string_view value); -void EmitConvertU64F16(EmitContext& ctx, std::string_view value); -void EmitConvertU64F32(EmitContext& ctx, std::string_view value); -void EmitConvertU64F64(EmitContext& ctx, std::string_view value); -void EmitConvertU64U32(EmitContext& ctx, std::string_view value); -void EmitConvertU32U64(EmitContext& ctx, std::string_view value); -void EmitConvertF16F32(EmitContext& ctx, std::string_view value); -void EmitConvertF32F16(EmitContext& ctx, std::string_view value); -void EmitConvertF32F64(EmitContext& ctx, std::string_view value); -void EmitConvertF64F32(EmitContext& ctx, std::string_view value); -void EmitConvertF16S8(EmitContext& ctx, std::string_view value); -void EmitConvertF16S16(EmitContext& ctx, std::string_view value); -void EmitConvertF16S32(EmitContext& ctx, std::string_view value); -void EmitConvertF16S64(EmitContext& ctx, std::string_view value); -void EmitConvertF16U8(EmitContext& ctx, std::string_view value); -void EmitConvertF16U16(EmitContext& ctx, std::string_view value); -void EmitConvertF16U32(EmitContext& ctx, std::string_view value); -void EmitConvertF16U64(EmitContext& ctx, std::string_view value); -void EmitConvertF32S8(EmitContext& ctx, std::string_view value); -void EmitConvertF32S16(EmitContext& ctx, std::string_view value); -void EmitConvertF32S32(EmitContext& ctx, std::string_view value); -void EmitConvertF32S64(EmitContext& ctx, std::string_view value); -void EmitConvertF32U8(EmitContext& ctx, std::string_view value); -void EmitConvertF32U16(EmitContext& ctx, std::string_view value); -void EmitConvertF32U32(EmitContext& ctx, std::string_view value); -void EmitConvertF32U64(EmitContext& ctx, std::string_view value); -void EmitConvertF64S8(EmitContext& ctx, std::string_view value); -void EmitConvertF64S16(EmitContext& ctx, std::string_view value); -void EmitConvertF64S32(EmitContext& ctx, std::string_view value); -void EmitConvertF64S64(EmitContext& ctx, std::string_view value); -void EmitConvertF64U8(EmitContext& ctx, std::string_view value); -void EmitConvertF64U16(EmitContext& ctx, std::string_view value); -void EmitConvertF64U32(EmitContext& ctx, std::string_view value); -void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitLogicalOr(EmitContext& ctx, ScalarS32 a, ScalarS32 b); +void EmitLogicalAnd(EmitContext& ctx, ScalarS32 a, ScalarS32 b); +void EmitLogicalXor(EmitContext& ctx, ScalarS32 a, ScalarS32 b); +void EmitLogicalNot(EmitContext& ctx, ScalarS32 value); +void EmitConvertS16F16(EmitContext& ctx, Register value); +void EmitConvertS16F32(EmitContext& ctx, Register value); +void EmitConvertS16F64(EmitContext& ctx, Register value); +void EmitConvertS32F16(EmitContext& ctx, Register value); +void EmitConvertS32F32(EmitContext& ctx, Register value); +void EmitConvertS32F64(EmitContext& ctx, Register value); +void EmitConvertS64F16(EmitContext& ctx, Register value); +void EmitConvertS64F32(EmitContext& ctx, Register value); +void EmitConvertS64F64(EmitContext& ctx, Register value); +void EmitConvertU16F16(EmitContext& ctx, Register value); +void EmitConvertU16F32(EmitContext& ctx, Register value); +void EmitConvertU16F64(EmitContext& ctx, Register value); +void EmitConvertU32F16(EmitContext& ctx, Register value); +void EmitConvertU32F32(EmitContext& ctx, Register value); +void EmitConvertU32F64(EmitContext& ctx, Register value); +void EmitConvertU64F16(EmitContext& ctx, Register value); +void EmitConvertU64F32(EmitContext& ctx, Register value); +void EmitConvertU64F64(EmitContext& ctx, Register value); +void EmitConvertU64U32(EmitContext& ctx, Register value); +void EmitConvertU32U64(EmitContext& ctx, Register value); +void EmitConvertF16F32(EmitContext& ctx, Register value); +void EmitConvertF32F16(EmitContext& ctx, Register value); +void EmitConvertF32F64(EmitContext& ctx, Register value); +void EmitConvertF64F32(EmitContext& ctx, Register value); +void EmitConvertF16S8(EmitContext& ctx, Register value); +void EmitConvertF16S16(EmitContext& ctx, Register value); +void EmitConvertF16S32(EmitContext& ctx, Register value); +void EmitConvertF16S64(EmitContext& ctx, Register value); +void EmitConvertF16U8(EmitContext& ctx, Register value); +void EmitConvertF16U16(EmitContext& ctx, Register value); +void EmitConvertF16U32(EmitContext& ctx, Register value); +void EmitConvertF16U64(EmitContext& ctx, Register value); +void EmitConvertF32S8(EmitContext& ctx, Register value); +void EmitConvertF32S16(EmitContext& ctx, Register value); +void EmitConvertF32S32(EmitContext& ctx, Register value); +void EmitConvertF32S64(EmitContext& ctx, Register value); +void EmitConvertF32U8(EmitContext& ctx, Register value); +void EmitConvertF32U16(EmitContext& ctx, Register value); +void EmitConvertF32U32(EmitContext& ctx, Register value); +void EmitConvertF32U64(EmitContext& ctx, Register value); +void EmitConvertF64S8(EmitContext& ctx, Register value); +void EmitConvertF64S16(EmitContext& ctx, Register value); +void EmitConvertF64S32(EmitContext& ctx, Register value); +void EmitConvertF64S64(EmitContext& ctx, Register value); +void EmitConvertF64U8(EmitContext& ctx, Register value); +void EmitConvertF64U16(EmitContext& ctx, Register value); +void EmitConvertF64U32(EmitContext& ctx, Register value); +void EmitConvertF64U64(EmitContext& ctx, Register value); void EmitBindlessImageSampleImplicitLod(EmitContext&); void EmitBindlessImageSampleExplicitLod(EmitContext&); void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); @@ -566,36 +507,29 @@ void EmitBoundImageGradient(EmitContext&); void EmitBoundImageRead(EmitContext&); void EmitBoundImageWrite(EmitContext&); void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset); + Register coords, Register bias_lc, const IR::Value& offset); void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset); + Register coords, Register lod_lc, const IR::Value& offset); void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset); + Register coords, Register dref, Register bias_lc, + const IR::Value& offset); void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2); -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms); + Register coords, Register dref, Register lod_lc, + const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + const IR::Value& offset, const IR::Value& offset2, Register dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + Register offset, Register lod, Register ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod); -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords); -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp); -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view color); + Register lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + Register derivates, Register offset, Register lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + Register color); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); @@ -619,53 +553,49 @@ void EmitBoundImageAtomicOr32(EmitContext&); void EmitBoundImageAtomicXor32(EmitContext&); void EmitBoundImageAtomicExchange32(EmitContext&); void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); + Register coords, ScalarU32 value); void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); + Register coords, ScalarS32 value); void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); + Register coords, ScalarU32 value); void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); + Register coords, ScalarS32 value); void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); + Register coords, ScalarU32 value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value); void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value); + Register coords, ScalarU32 value); void EmitLaneId(EmitContext& ctx); -void EmitVoteAll(EmitContext& ctx, std::string_view pred); -void EmitVoteAny(EmitContext& ctx, std::string_view pred); -void EmitVoteEqual(EmitContext& ctx, std::string_view pred); -void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); +void EmitVoteAll(EmitContext& ctx, ScalarS32 pred); +void EmitVoteAny(EmitContext& ctx, ScalarS32 pred); +void EmitVoteEqual(EmitContext& ctx, ScalarS32 pred); +void EmitSubgroupBallot(EmitContext& ctx, ScalarS32 pred); void EmitSubgroupEqMask(EmitContext& ctx); void EmitSubgroupLtMask(EmitContext& ctx); void EmitSubgroupLeMask(EmitContext& ctx); void EmitSubgroupGtMask(EmitContext& ctx); void EmitSubgroupGeMask(EmitContext& ctx); -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask); -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask); -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask); -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask); -void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, - std::string_view swizzle); -void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); -void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); -void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); -void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle); +void EmitDPdxFine(EmitContext& ctx, ScalarF32 op_a); +void EmitDPdyFine(EmitContext& ctx, ScalarF32 op_a); +void EmitDPdxCoarse(EmitContext& ctx, ScalarF32 op_a); +void EmitDPdyCoarse(EmitContext& ctx, ScalarF32 op_a); } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 579806c38..7b88d6f02 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -2,239 +2,209 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { -void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("ADD.U {},{},{};", inst, a, b); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("ADD.S {}.x,{},{};", inst, a, b); } -void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, + [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } -void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("SUB.U {},{},{};", inst, a, b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("SUB.S {}.x,{},{};", inst, a, b); } -void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, + [[maybe_unused]] Register b) { throw NotImplementedException("GLASM instruction"); } -void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MUL.S {}.x,{},{};", inst, a, b); +} + +void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { + ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); +} + +void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, + [[maybe_unused]] Register shift) { throw NotImplementedException("GLASM instruction"); } -void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 base, + [[maybe_unused]] ScalarU32 shift) { throw NotImplementedException("GLASM instruction"); } -void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, + [[maybe_unused]] Register shift) { throw NotImplementedException("GLASM instruction"); } -void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 base, + [[maybe_unused]] ScalarS32 shift) { throw NotImplementedException("GLASM instruction"); } -void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, + [[maybe_unused]] Register shift) { throw NotImplementedException("GLASM instruction"); } -void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("AND.S {}.x,{},{};", inst, a, b); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("OR.S {}.x,{},{};", inst, a, b); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("XOR.S {}.x,{},{};", inst, a, b); +} + +void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 base, + [[maybe_unused]] ScalarS32 insert, [[maybe_unused]] ScalarS32 offset, + [[maybe_unused]] ScalarS32 count) { throw NotImplementedException("GLASM instruction"); } -void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] ScalarS32 base, [[maybe_unused]] ScalarS32 offset, + [[maybe_unused]] ScalarS32 count) { throw NotImplementedException("GLASM instruction"); } -void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("AND {},{},{};", inst, a, b); -} - -void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("OR {},{},{};", inst, a, b); -} - -void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.Add("XOR {},{},{};", inst, a, b); -} - -void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, - std::string_view insert, std::string_view offset, std::string_view count) { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); - ctx.Add("BFI.U {},RC,{},{};", inst, insert, base); -} - -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, - std::string_view offset, std::string_view count) { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); - ctx.Add("BFE.S {},RC,{};", inst, base); -} - -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, - std::string_view offset, std::string_view count) { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); - ctx.Add("BFE.U {},RC,{};", inst, base); -} - -void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - ctx.Add("BFR {},{};", inst, value); -} - -void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] ScalarU32 base, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarU32 count) { throw NotImplementedException("GLASM instruction"); } -void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - ctx.Add("NOT {},{};", inst, value); -} - -void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 a, + [[maybe_unused]] ScalarS32 b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 a, + [[maybe_unused]] ScalarU32 b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 a, + [[maybe_unused]] ScalarS32 b) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 a, + [[maybe_unused]] ScalarU32 b) { throw NotImplementedException("GLASM instruction"); } void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, - [[maybe_unused]] std::string_view max) { + [[maybe_unused]] ScalarS32 value, [[maybe_unused]] ScalarS32 min, + [[maybe_unused]] ScalarS32 max) { throw NotImplementedException("GLASM instruction"); } void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, - [[maybe_unused]] std::string_view max) { + [[maybe_unused]] ScalarU32 value, [[maybe_unused]] ScalarU32 min, + [[maybe_unused]] ScalarU32 max) { throw NotImplementedException("GLASM instruction"); } -void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - ctx.Add("SLT.S {},{},{};", inst, lhs, rhs); +void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, + [[maybe_unused]] ScalarS32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - ctx.Add("SLT.U {},{},{};", inst, lhs, rhs); +void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, + [[maybe_unused]] ScalarU32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - ctx.Add("SEQ {},{},{};", inst, lhs, rhs); +void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, + [[maybe_unused]] ScalarS32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.Add("SLE.S {},{},{};", inst, lhs, rhs); +void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, + [[maybe_unused]] ScalarS32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.Add("SLE.U {},{},{};", inst, lhs, rhs); +void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, + [[maybe_unused]] ScalarU32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.Add("SGT.S {},{},{};", inst, lhs, rhs); +void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, + [[maybe_unused]] ScalarS32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.Add("SGT.U {},{},{};", inst, lhs, rhs); +void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, + [[maybe_unused]] ScalarU32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - ctx.Add("SNE.U {},{},{};", inst, lhs, rhs); +void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, + [[maybe_unused]] ScalarS32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.Add("SGE.S {},{},{};", inst, lhs, rhs); +void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, + [[maybe_unused]] ScalarS32 rhs) { + throw NotImplementedException("GLASM instruction"); } -void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.Add("SGE.U {},{},{};", inst, lhs, rhs); +void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, + [[maybe_unused]] ScalarU32 rhs) { + throw NotImplementedException("GLASM instruction"); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 9e38a1bdf..8ef0f7c17 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -11,7 +11,7 @@ namespace Shader::Backend::GLASM { namespace { -void StorageOp(EmitContext& ctx, const IR::Value& binding, std::string_view offset, +void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, std::string_view then_expr, std::string_view else_expr = {}) { // Operate on bindless SSBO, call the expression with bounds checking // address = c[binding].xy @@ -23,20 +23,21 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, std::string_view offs "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length sb_binding, offset, offset, sb_binding); if (else_expr.empty()) { - ctx.Add("{}", then_expr); + ctx.Add("IF NE.x;{}ENDIF;", then_expr); } else { ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); } } -void Store(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value, std::string_view size) { +template +void Store(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, + std::string_view size) { StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, value)); } -void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, std::string_view offset, +void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { - const std::string ret{ctx.reg_alloc.Define(inst)}; + const Register ret{ctx.reg_alloc.Define(inst)}; StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, ret), fmt::format("MOV.U {},{{0,0,0,0}};", ret)); } @@ -58,18 +59,15 @@ void EmitLoadGlobalS16([[maybe_unused]] EmitContext& ctx) { throw NotImplementedException("GLASM instruction"); } -void EmitLoadGlobal32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view address) { +void EmitLoadGlobal32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address) { throw NotImplementedException("GLASM instruction"); } -void EmitLoadGlobal64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view address) { +void EmitLoadGlobal64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address) { throw NotImplementedException("GLASM instruction"); } -void EmitLoadGlobal128([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view address) { +void EmitLoadGlobal128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address) { throw NotImplementedException("GLASM instruction"); } @@ -89,89 +87,88 @@ void EmitWriteGlobalS16([[maybe_unused]] EmitContext& ctx) { throw NotImplementedException("GLASM instruction"); } -void EmitWriteGlobal32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view address, - [[maybe_unused]] std::string_view value) { +void EmitWriteGlobal32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address, + [[maybe_unused]] ScalarU32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitWriteGlobal64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view address, - [[maybe_unused]] std::string_view value) { +void EmitWriteGlobal64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address, + [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitWriteGlobal128([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view address, - [[maybe_unused]] std::string_view value) { +void EmitWriteGlobal128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address, + [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "U8"); } void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "S8"); } void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "U16"); } void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "S16"); } void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "U32"); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "U32X2"); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - std::string_view offset) { + ScalarU32 offset) { Load(ctx, inst, binding, offset, "U32X4"); } -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { Store(ctx, binding, offset, value, "U8"); } -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value) { Store(ctx, binding, offset, value, "S8"); } -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { Store(ctx, binding, offset, value, "U16"); } -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarS32 value) { Store(ctx, binding, offset, value, "S16"); } -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + ScalarU32 value) { Store(ctx, binding, offset, value, "U32"); } -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value) { Store(ctx, binding, offset, value, "U32X2"); } -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, std::string_view offset, - std::string_view value) { +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + Register value) { Store(ctx, binding, offset, value, "U32X4"); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 32eb87837..08de3f92f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -25,21 +25,19 @@ void EmitVoid(EmitContext& ctx) { NotImplemented(); } -void EmitBranch(EmitContext& ctx, std::string_view label) { +void EmitBranch(EmitContext& ctx) { NotImplemented(); } -void EmitBranchConditional(EmitContext& ctx, std::string_view condition, - std::string_view true_label, std::string_view false_label) { +void EmitBranchConditional(EmitContext& ctx) { NotImplemented(); } -void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, - std::string_view continue_label) { +void EmitLoopMerge(EmitContext& ctx) { NotImplemented(); } -void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { +void EmitSelectionMerge(EmitContext& ctx) { NotImplemented(); } @@ -55,7 +53,7 @@ void EmitUnreachable(EmitContext& ctx) { NotImplemented(); } -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { +void EmitDemoteToHelperInvocation(EmitContext& ctx) { NotImplemented(); } @@ -155,8 +153,8 @@ void EmitWorkgroupId(EmitContext& ctx) { NotImplemented(); } -void EmitLocalInvocationId(EmitContext& ctx) { - NotImplemented(); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {},invocation.localid;", inst); } void EmitInvocationId(EmitContext& ctx) { @@ -175,11 +173,11 @@ void EmitYDirection(EmitContext& ctx) { NotImplemented(); } -void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { +void EmitLoadLocal(EmitContext& ctx, ScalarU32 word_offset) { NotImplemented(); } -void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { NotImplemented(); } @@ -203,245 +201,127 @@ void EmitUndefU64(EmitContext& ctx) { NotImplemented(); } -void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedU8(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedS8(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedU16(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedS16(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedU32(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedU64(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { +void EmitLoadSharedU128(EmitContext& ctx, ScalarU32 offset) { NotImplemented(); } -void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { NotImplemented(); } -void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { NotImplemented(); } -void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { NotImplemented(); } -void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { NotImplemented(); } -void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { NotImplemented(); } -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { +void EmitSelectU1(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { NotImplemented(); } -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { +void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { NotImplemented(); } -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { +void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { NotImplemented(); } -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitSelectU32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { NotImplemented(); } -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitSelectU64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value) { NotImplemented(); } -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value) { NotImplemented(); } -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { +void EmitSelectF32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { NotImplemented(); } -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { +void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value) { NotImplemented(); } -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { +void EmitSelectF16(EmitContext& ctx, Register cond, Register true_value, Register false_value) { NotImplemented(); } -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { +void EmitSelectF32(EmitContext& ctx, Register cond, Register true_value, Register false_value) { NotImplemented(); } -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { +void EmitSelectF64(EmitContext& ctx, Register cond, Register true_value, Register false_value) { NotImplemented(); } -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { +void EmitPackUint2x32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitUnpackUint2x32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitPackFloat2x16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitUnpackFloat2x16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { +void EmitPackHalf2x16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { +void EmitUnpackHalf2x16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { +void EmitPackDouble2x32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { - NotImplemented(); -} - -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { - NotImplemented(); -} - -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { - NotImplemented(); -} - -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeConstructF64x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeConstructF64x3(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeConstructF64x4(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeExtractF64x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeExtractF64x3(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeExtractF64x4(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { +void EmitUnpackDouble2x32(EmitContext& ctx, Register value) { NotImplemented(); } @@ -469,210 +349,198 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { +void EmitFPIsNan16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { +void EmitFPIsNan32(EmitContext& ctx, ScalarF32 value) { NotImplemented(); } -void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { +void EmitFPIsNan64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicIAdd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicSMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value) { NotImplemented(); } -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicUMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicSMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value) { NotImplemented(); } -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicUMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicInc32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicDec32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicAnd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicOr32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicXor32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicExchange32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicExchange64(EmitContext& ctx, ScalarU32 pointer_offset, Register value) { NotImplemented(); } void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarS32 value) { NotImplemented(); } void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarS32 value) { NotImplemented(); } void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value) { + const IR::Value& offset, ScalarU32 value) { NotImplemented(); } void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value) { + const IR::Value& offset, Register value) { NotImplemented(); } void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + ScalarF32 value) { NotImplemented(); } void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + Register value) { NotImplemented(); } @@ -792,211 +660,211 @@ void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { NotImplemented(); } -void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitLogicalOr(EmitContext& ctx, ScalarS32 a, ScalarS32 b) { NotImplemented(); } -void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitLogicalAnd(EmitContext& ctx, ScalarS32 a, ScalarS32 b) { NotImplemented(); } -void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitLogicalXor(EmitContext& ctx, ScalarS32 a, ScalarS32 b) { NotImplemented(); } -void EmitLogicalNot(EmitContext& ctx, std::string_view value) { +void EmitLogicalNot(EmitContext& ctx, ScalarS32 value) { NotImplemented(); } -void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { +void EmitConvertS16F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { +void EmitConvertS16F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { +void EmitConvertS16F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { +void EmitConvertS32F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { +void EmitConvertS32F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { +void EmitConvertS32F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { +void EmitConvertS64F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { +void EmitConvertS64F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { +void EmitConvertS64F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { +void EmitConvertU16F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { +void EmitConvertU16F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { +void EmitConvertU16F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { +void EmitConvertU32F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { +void EmitConvertU32F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { +void EmitConvertU32F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { +void EmitConvertU64F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { +void EmitConvertU64F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { +void EmitConvertU64F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { +void EmitConvertU64U32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { +void EmitConvertU32U64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { +void EmitConvertF16F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { +void EmitConvertF32F16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { +void EmitConvertF32F64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { +void EmitConvertF64F32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S8(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U8(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S8(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U8(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S8(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S64(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U8(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U16(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U64(EmitContext& ctx, Register value) { NotImplemented(); } @@ -1097,69 +965,62 @@ void EmitBoundImageWrite(EmitContext&) { } void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset) { + Register coords, Register bias_lc, const IR::Value& offset) { NotImplemented(); } void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset) { + Register coords, Register lod_lc, const IR::Value& offset) { NotImplemented(); } void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset) { + Register coords, Register dref, Register bias_lc, + const IR::Value& offset) { NotImplemented(); } void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset) { + Register coords, Register dref, Register lod_lc, + const IR::Value& offset) { NotImplemented(); } -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + const IR::Value& offset, const IR::Value& offset2) { NotImplemented(); } -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref) { +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + const IR::Value& offset, const IR::Value& offset2, Register dref) { NotImplemented(); } -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms) { +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + Register offset, Register lod, Register ms) { NotImplemented(); } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { + Register lod) { NotImplemented(); } -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords) { NotImplemented(); } -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp) { +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + Register derivates, Register offset, Register lod_clamp) { NotImplemented(); } -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords) { NotImplemented(); } -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + Register color) { NotImplemented(); } @@ -1252,57 +1113,57 @@ void EmitBoundImageAtomicExchange32(EmitContext&) { } void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + Register coords, ScalarU32 value) { NotImplemented(); } void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + Register coords, ScalarS32 value) { NotImplemented(); } void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + Register coords, ScalarU32 value) { NotImplemented(); } void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + Register coords, ScalarS32 value) { NotImplemented(); } void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + Register coords, ScalarU32 value) { NotImplemented(); } -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value) { NotImplemented(); } -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value) { NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value) { NotImplemented(); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value) { NotImplemented(); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, + ScalarU32 value) { NotImplemented(); } void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + Register coords, ScalarU32 value) { NotImplemented(); } @@ -1310,19 +1171,19 @@ void EmitLaneId(EmitContext& ctx) { NotImplemented(); } -void EmitVoteAll(EmitContext& ctx, std::string_view pred) { +void EmitVoteAll(EmitContext& ctx, ScalarS32 pred) { NotImplemented(); } -void EmitVoteAny(EmitContext& ctx, std::string_view pred) { +void EmitVoteAny(EmitContext& ctx, ScalarS32 pred) { NotImplemented(); } -void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { +void EmitVoteEqual(EmitContext& ctx, ScalarS32 pred) { NotImplemented(); } -void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { +void EmitSubgroupBallot(EmitContext& ctx, ScalarS32 pred) { NotImplemented(); } @@ -1346,47 +1207,43 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask) { NotImplemented(); } -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask) { +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask) { NotImplemented(); } -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask) { NotImplemented(); } -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + ScalarU32 clamp, ScalarU32 segmentation_mask) { NotImplemented(); } -void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, - std::string_view swizzle) { +void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle) { NotImplemented(); } -void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { +void EmitDPdxFine(EmitContext& ctx, ScalarF32 op_a) { NotImplemented(); } -void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { +void EmitDPdyFine(EmitContext& ctx, ScalarF32 op_a) { NotImplemented(); } -void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { +void EmitDPdxCoarse(EmitContext& ctx, ScalarF32 op_a) { NotImplemented(); } -void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { +void EmitDPdyCoarse(EmitContext& ctx, ScalarF32 op_a) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index 16f6c33f3..e69de29bb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -1,46 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/backend/glasm/emit_context.h" -#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::Backend::GLASM { - -void EmitSelectU1(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSelectU8(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSelectU16(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, - std::string_view true_value, std::string_view false_value) { - ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); -} - -void EmitSelectU64(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSelectF16(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, - std::string_view true_value, std::string_view false_value) { - ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); -} - -void EmitSelectF64(EmitContext&, std::string_view, std::string_view, std::string_view) { - throw NotImplementedException("GLASM instruction"); -} -} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index e198dd522..030b48d83 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -12,53 +12,61 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { -namespace { -std::string Representation(Id id) { - if (id.is_condition_code != 0) { - throw NotImplementedException("Condition code"); - } - if (id.is_spill != 0) { - throw NotImplementedException("Spilling"); - } - const u32 index{static_cast(id.index)}; - return fmt::format("R{}.x", index); + +Register RegAlloc::Define(IR::Inst& inst) { + const Id id{Alloc()}; + inst.SetDefinition(id); + Register ret; + ret.type = Type::Register; + ret.id = id; + return ret; } -std::string ImmValue(const IR::Value& value) { +Value RegAlloc::Consume(const IR::Value& value) { + if (!value.IsImmediate()) { + return Consume(*value.InstRecursive()); + } + Value ret; switch (value.Type()) { case IR::Type::U1: - return value.U1() ? "-1" : "0"; + ret.type = Type::U32; + ret.imm_u32 = value.U1() ? 0xffffffff : 0; + break; case IR::Type::U32: - return fmt::format("{}", value.U32()); + ret.type = Type::U32; + ret.imm_u32 = value.U32(); + break; case IR::Type::F32: - return fmt::format("{}", value.F32()); + ret.type = Type::F32; + ret.imm_f32 = value.F32(); + break; default: throw NotImplementedException("Immediate type {}", value.Type()); } -} -} // Anonymous namespace - -std::string RegAlloc::Define(IR::Inst& inst) { - const Id id{Alloc()}; - inst.SetDefinition(id); - return Representation(id); + return ret; } -std::string RegAlloc::Consume(const IR::Value& value) { - if (value.IsImmediate()) { - return ImmValue(value); - } else { - return Consume(*value.InstRecursive()); - } +Register RegAlloc::AllocReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(); + return ret; } -std::string RegAlloc::Consume(IR::Inst& inst) { +void RegAlloc::FreeReg(Register reg) { + Free(reg.id); +} + +Value RegAlloc::Consume(IR::Inst& inst) { const Id id{inst.Definition()}; inst.DestructiveRemoveUsage(); if (!inst.HasUses()) { Free(id); } - return Representation(inst.Definition()); + Value ret; + ret.type = Type::Register; + ret.id = id; + return ret; } Id RegAlloc::Alloc() { diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index f73aa3348..ef0b6697f 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -6,8 +6,12 @@ #include +#include + +#include "common/bit_cast.h" #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/exception.h" namespace Shader::IR { class Inst; @@ -18,6 +22,13 @@ namespace Shader::Backend::GLASM { class EmitContext; +enum class Type : u32 { + Register, + U32, + S32, + F32, +}; + struct Id { union { u32 raw; @@ -25,15 +36,62 @@ struct Id { BitField<30, 1, u32> is_spill; BitField<31, 1, u32> is_condition_code; }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } }; +static_assert(sizeof(Id) == sizeof(u32)); + +struct Value { + Type type; + union { + Id id; + u32 imm_u32; + s32 imm_s32; + f32 imm_f32; + }; + + bool operator==(const Value& rhs) const noexcept { + if (type != rhs.type) { + return false; + } + switch (type) { + case Type::Register: + return id == rhs.id; + case Type::U32: + return imm_u32 == rhs.imm_u32; + case Type::S32: + return imm_s32 == rhs.imm_s32; + case Type::F32: + return Common::BitCast(imm_f32) == Common::BitCast(rhs.imm_f32); + } + return false; + } + bool operator!=(const Value& rhs) const noexcept { + return !operator==(rhs); + } +}; +struct Register : Value {}; +struct ScalarRegister : Value {}; +struct ScalarU32 : Value {}; +struct ScalarS32 : Value {}; +struct ScalarF32 : Value {}; class RegAlloc { public: RegAlloc(EmitContext& ctx_) : ctx{ctx_} {} - std::string Define(IR::Inst& inst); + Register Define(IR::Inst& inst); - std::string Consume(const IR::Value& value); + Value Consume(const IR::Value& value); + + Register AllocReg(); + + void FreeReg(Register reg); [[nodiscard]] size_t NumUsedRegisters() const noexcept { return num_used_registers; @@ -43,16 +101,132 @@ private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; - EmitContext& ctx; - - std::string Consume(IR::Inst& inst); + Value Consume(IR::Inst& inst); Id Alloc(); void Free(Id id); + EmitContext& ctx; size_t num_used_registers{}; std::bitset register_use{}; }; +template +auto FormatTo(FormatContext& ctx, Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code emission"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spill emission"); + } + if constexpr (scalar) { + return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}", id.index.Value()); + } +} + } // namespace Shader::Backend::GLASM + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { + return FormatTo(ctx, id); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) { + if (value.type != Shader::Backend::GLASM::Type::Register) { + throw Shader::InvalidArgument("Register value type is not register"); + } + return FormatTo(ctx, value.id); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) { + if (value.type != Shader::Backend::GLASM::Type::Register) { + throw Shader::InvalidArgument("Register value type is not register"); + } + return FormatTo(ctx, value.id); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Register: + return FormatTo(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", value.imm_u32); + case Shader::Backend::GLASM::Type::S32: + return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_s32)); + case Shader::Backend::GLASM::Type::F32: + return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Register: + return FormatTo(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_u32)); + case Shader::Backend::GLASM::Type::S32: + return fmt::format_to(ctx.out(), "{}", value.imm_s32); + case Shader::Backend::GLASM::Type::F32: + return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Register: + return FormatTo(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_u32)); + case Shader::Backend::GLASM::Type::S32: + return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_s32)); + case Shader::Backend::GLASM::Type::F32: + return fmt::format_to(ctx.out(), "{}", value.imm_f32); + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; From 01e18581b9bdba021e6a4ae8fa0f0987ceea3e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 03:09:55 -0300 Subject: [PATCH 347/770] vk_pipeline_cache: Enable int8 and int16 types on Vulkan --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5edcd072..7830c0194 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -128,6 +128,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, + .support_int8 = true, + .support_int16 = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == From 939dab71202aa28b907e14876a3811dca0ed2348 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 04:01:17 -0300 Subject: [PATCH 348/770] glasm: Implement more GLASM composite instructions --- .../backend/glasm/emit_glasm_composite.cpp | 92 ++++++++++--------- .../backend/glasm/emit_glasm_instructions.h | 25 +++-- 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index 063dcaf13..94dc5019d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -8,60 +8,71 @@ namespace Shader::Backend::GLASM { namespace { -template -void CompositeConstructU32(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { +template +void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { const Register ret{ctx.reg_alloc.Define(inst)}; if (std::ranges::any_of(std::array{elements...}, [](const IR::Value& value) { return value.IsImmediate(); })) { - const std::array values{(elements.IsImmediate() ? elements.U32() : 0)...}; - ctx.Add("MOV.U {},{{{},{},{},{}}};", ret, fmt::to_string(values[0]), + using Type = std::invoke_result_t; + const std::array values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...}; + ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]), fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); } size_t index{}; for (const IR::Value& element : {elements...}) { if (!element.IsImmediate()) { const ScalarU32 value{ctx.reg_alloc.Consume(element)}; - ctx.Add("MOV.U {}.{},{};", ret, "xyzw"[index], value); + ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value); } ++index; } } -void CompositeExtractU32(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { +void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) { const Register ret{ctx.reg_alloc.Define(inst)}; if (ret == composite && index == 0) { // No need to do anything here, the source and destination are the same register return; } - ctx.Add("MOV.U {}.x,{}.{};", ret, composite, "xyzw"[index]); + ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]); +} + +template +void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, + u32 index, char type) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ret != composite) { + ctx.Add("MOV.{} {},{};", type, ret, composite); + } + ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], object); } } // Anonymous namespace void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, const IR::Value& e2) { - CompositeConstructU32(ctx, inst, e1, e2); + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2); } void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, const IR::Value& e2, const IR::Value& e3) { - CompositeConstructU32(ctx, inst, e1, e2, e3); + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3); } void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { - CompositeConstructU32(ctx, inst, e1, e2, e3, e4); + CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4); } void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { - CompositeExtractU32(ctx, inst, composite, index); + CompositeExtract(ctx, inst, composite, index, 'U'); } void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { - CompositeExtractU32(ctx, inst, composite, index); + CompositeExtract(ctx, inst, composite, index, 'U'); } void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { - CompositeExtractU32(ctx, inst, composite, index); + CompositeExtract(ctx, inst, composite, index, 'U'); } void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, @@ -131,53 +142,46 @@ void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("GLASM instruction"); } -void EmitCompositeConstructF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1, - [[maybe_unused]] ScalarF32 e2) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2); } -void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1, - [[maybe_unused]] ScalarF32 e2, [[maybe_unused]] ScalarF32 e3) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3); } -void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 e1, - [[maybe_unused]] ScalarF32 e2, [[maybe_unused]] ScalarF32 e3, - [[maybe_unused]] ScalarF32 e4) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { + CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4); } -void EmitCompositeExtractF32x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); } -void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); } -void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { + CompositeExtract(ctx, inst, composite, index, 'F'); } -void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] Register composite, - [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); } -void EmitCompositeInsertF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] Register composite, - [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); } -void EmitCompositeInsertF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] Register composite, - [[maybe_unused]] ScalarF32 object, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLASM instruction"); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index) { + CompositeInsert(ctx, inst, composite, object, index, 'F'); } void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 6db76bf46..42de6716a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -158,16 +158,21 @@ void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2); -void EmitCompositeConstructF32x3(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2, ScalarF32 e3); -void EmitCompositeConstructF32x4(EmitContext& ctx, ScalarF32 e1, ScalarF32 e2, ScalarF32 e3, - ScalarF32 e4); -void EmitCompositeExtractF32x2(EmitContext& ctx, Register composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, Register composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, Register composite, u32 index); -void EmitCompositeInsertF32x2(EmitContext& ctx, Register composite, ScalarF32 object, u32 index); -void EmitCompositeInsertF32x3(EmitContext& ctx, Register composite, ScalarF32 object, u32 index); -void EmitCompositeInsertF32x4(EmitContext& ctx, Register composite, ScalarF32 object, u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, + const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, + ScalarF32 object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); From 6358b0d0c1968e6491d39d36f08055f834e0030f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 04:08:19 -0300 Subject: [PATCH 349/770] glasm: Specify namespace when using FormatTo --- src/shader_recompiler/backend/glasm/reg_alloc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index ef0b6697f..6a238afa9 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -136,7 +136,7 @@ struct fmt::formatter { } template auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { - return FormatTo(ctx, id); + return Shader::Backend::GLASM::FormatTo(ctx, id); } }; @@ -150,7 +150,7 @@ struct fmt::formatter { if (value.type != Shader::Backend::GLASM::Type::Register) { throw Shader::InvalidArgument("Register value type is not register"); } - return FormatTo(ctx, value.id); + return Shader::Backend::GLASM::FormatTo(ctx, value.id); } }; @@ -164,7 +164,7 @@ struct fmt::formatter { if (value.type != Shader::Backend::GLASM::Type::Register) { throw Shader::InvalidArgument("Register value type is not register"); } - return FormatTo(ctx, value.id); + return Shader::Backend::GLASM::FormatTo(ctx, value.id); } }; @@ -177,7 +177,7 @@ struct fmt::formatter { auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { switch (value.type) { case Shader::Backend::GLASM::Type::Register: - return FormatTo(ctx, value.id); + return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: return fmt::format_to(ctx.out(), "{}", value.imm_u32); case Shader::Backend::GLASM::Type::S32: @@ -198,7 +198,7 @@ struct fmt::formatter { auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { switch (value.type) { case Shader::Backend::GLASM::Type::Register: - return FormatTo(ctx, value.id); + return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_u32)); case Shader::Backend::GLASM::Type::S32: @@ -219,7 +219,7 @@ struct fmt::formatter { auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { switch (value.type) { case Shader::Backend::GLASM::Type::Register: - return FormatTo(ctx, value.id); + return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_u32)); case Shader::Backend::GLASM::Type::S32: From 4de65fbff410d0d42fa34f8ccd3cf6a1ce136797 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 04:09:17 -0300 Subject: [PATCH 350/770] glasm: Remove unused functions left from rebase --- .../backend/glasm/emit_glasm_not_implemented.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 08de3f92f..5729b27a7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -281,18 +281,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist NotImplemented(); } -void EmitSelectF16(EmitContext& ctx, Register cond, Register true_value, Register false_value) { - NotImplemented(); -} - -void EmitSelectF32(EmitContext& ctx, Register cond, Register true_value, Register false_value) { - NotImplemented(); -} - -void EmitSelectF64(EmitContext& ctx, Register cond, Register true_value, Register false_value) { - NotImplemented(); -} - void EmitPackUint2x32(EmitContext& ctx, Register value) { NotImplemented(); } From 9f851e3832fb85c20f406eacfadd12a8bb7d982d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 04:18:37 -0300 Subject: [PATCH 351/770] glasm: Implement GLASM fp16 packing and move bitwise insns --- .../backend/glasm/emit_glasm.cpp | 40 ---------- .../glasm/emit_glasm_bitwise_conversion.cpp | 75 +++++++++++++++++++ .../backend/glasm/emit_glasm_instructions.h | 4 +- .../glasm/emit_glasm_not_implemented.cpp | 24 ------ 4 files changed, 77 insertions(+), 66 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 8981cf300..842ec157d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -139,18 +139,6 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { } throw LogicError("Invalid opcode {}", inst->GetOpcode()); } - -void Alias(IR::Inst& inst, const IR::Value& value) { - if (value.IsImmediate()) { - return; - } - IR::Inst* const value_inst{value.InstRecursive()}; - if (inst.GetOpcode() == IR::Opcode::Identity) { - value_inst->DestructiveAddUsage(inst.UseCount()); - value_inst->DestructiveRemoveUsage(); - } - inst.SetDefinition(value_inst->Definition()); -} } // Anonymous namespace std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { @@ -183,32 +171,4 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { return ctx.code; } -void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - -void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - -void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - -void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - -void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - -void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - -void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); -} - } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index e69de29bb..918d82375 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -0,0 +1,75 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +static void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst* const value_inst{value.InstRecursive()}; + if (inst.GetOpcode() == IR::Opcode::Identity) { + value_inst->DestructiveAddUsage(inst.UseCount()); + value_inst->DestructiveRemoveUsage(); + } + inst.SetDefinition(value_inst->Definition()); +} + +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + +void EmitPackUint2x32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUnpackUint2x32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("PK2H {}.x,{};", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP2H {}.xy,{}.x;", inst, value); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 42de6716a..cb1067dc9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -200,8 +200,8 @@ void EmitPackUint2x32(EmitContext& ctx, Register value); void EmitUnpackUint2x32(EmitContext& ctx, Register value); void EmitPackFloat2x16(EmitContext& ctx, Register value); void EmitUnpackFloat2x16(EmitContext& ctx, Register value); -void EmitPackHalf2x16(EmitContext& ctx, Register value); -void EmitUnpackHalf2x16(EmitContext& ctx, Register value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); void EmitPackDouble2x32(EmitContext& ctx, Register value); void EmitUnpackDouble2x32(EmitContext& ctx, Register value); void EmitGetZeroFromOp(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 5729b27a7..03464524e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -281,30 +281,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist NotImplemented(); } -void EmitPackUint2x32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitUnpackUint2x32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitPackFloat2x16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitUnpackFloat2x16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitPackHalf2x16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitUnpackHalf2x16(EmitContext& ctx, Register value) { - NotImplemented(); -} - void EmitPackDouble2x32(EmitContext& ctx, Register value) { NotImplemented(); } From 4502595bc2518eecf934110e9393b11bf0c2f75a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 18:03:01 -0300 Subject: [PATCH 352/770] glasm: Initial GLASM fp64 support --- .../backend/glasm/emit_context.h | 7 ++ .../backend/glasm/emit_glasm.cpp | 17 +++-- .../glasm/emit_glasm_bitwise_conversion.cpp | 8 +++ .../glasm/emit_glasm_floating_point.cpp | 16 ++--- .../backend/glasm/emit_glasm_instructions.h | 12 ++-- .../backend/glasm/emit_glasm_memory.cpp | 10 +-- .../glasm/emit_glasm_not_implemented.cpp | 8 --- .../backend/glasm/reg_alloc.cpp | 66 +++++++++++++------ .../backend/glasm/reg_alloc.h | 63 ++++++++++++++++-- 9 files changed, 152 insertions(+), 55 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index a59acbf6c..37663c1c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -29,6 +29,13 @@ public: code += '\n'; } + template + void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(format_str, reg_alloc.LongDefine(inst), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + template void Add(const char* format_str, Args&&... args) { code += fmt::format(format_str, std::forward(args)...); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 842ec157d..9db6eb4a0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -42,7 +42,11 @@ template struct RegWrapper { RegWrapper(EmitContext& ctx, Value value) : reg_alloc{ctx.reg_alloc}, allocated{value.type != Type::Register} { - reg = allocated ? reg_alloc.AllocReg() : Register{value}; + if (allocated) { + reg = value.type == Type::F64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); + } else { + reg = Register{value}; + } switch (value.type) { case Type::Register: break; @@ -55,6 +59,9 @@ struct RegWrapper { case Type::F32: ctx.Add("MOV.F {}.x,{};", reg, value.imm_f32); break; + case Type::F64: + ctx.Add("MOV.F64 {}.x,{};", reg, value.imm_f64); + break; } } ~RegWrapper() { @@ -162,10 +169,12 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { header += fmt::format("R{},", index); } - header += "RC;"; - if (!program.info.storage_buffers_descriptors.empty()) { - header += "LONG TEMP LC;"; + header += "RC;" + "LONG TEMP "; + for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { + header += fmt::format("D{},", index); } + header += "DC;"; ctx.code.insert(0, header); ctx.code += "END"; return ctx.code; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 918d82375..eb6140954 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -72,4 +72,12 @@ void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { ctx.Add("UP2H {}.xy,{}.x;", inst, value); } +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("PK64 {}.x,{};", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP64 {}.xy,{}.x;", inst, value); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index fed6503c6..2b9a210aa 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -10,7 +10,8 @@ namespace Shader::Backend::GLASM { -void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } @@ -18,8 +19,8 @@ void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { ctx.Add("MOV.F {}.x,|{}|;", inst, value); } -void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); } void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -31,9 +32,8 @@ void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { ctx.Add("ADD.F {}.x,{},{};", inst, a, b); } -void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] Register a, [[maybe_unused]] Register b) { - throw NotImplementedException("GLASM instruction"); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("ADD.F64 {}.x,{},{};", inst, a, b); } void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -94,8 +94,8 @@ void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { ctx.Add("MOV.F {}.x,-{};", inst, value); } -void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); } void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index cb1067dc9..ab1e08215 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -202,20 +202,20 @@ void EmitPackFloat2x16(EmitContext& ctx, Register value); void EmitUnpackFloat2x16(EmitContext& ctx, Register value); void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); -void EmitPackDouble2x32(EmitContext& ctx, Register value); -void EmitUnpackDouble2x32(EmitContext& ctx, Register value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); void EmitGetInBoundsFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx, Register value); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); -void EmitFPAbs64(EmitContext& ctx, Register value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); -void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); @@ -228,7 +228,7 @@ void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitFPNeg16(EmitContext& ctx, Register value); void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); -void EmitFPNeg64(EmitContext& ctx, Register value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitFPSin(EmitContext& ctx, ScalarF32 value); void EmitFPCos(EmitContext& ctx, ScalarF32 value); void EmitFPExp2(EmitContext& ctx, ScalarF32 value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 8ef0f7c17..0c6a6e1c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -17,9 +17,9 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, // address = c[binding].xy // length = c[binding].z const u32 sb_binding{binding.U32()}; - ctx.Add("PK64.U LC,c[{}];" // pointer = address - "CVT.U64.U32 LC.z,{};" // offset = uint64_t(offset) - "ADD.U64 LC.x,LC.x,LC.z;" // pointer += offset + ctx.Add("PK64.U DC,c[{}];" // pointer = address + "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) + "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length sb_binding, offset, offset, sb_binding); if (else_expr.empty()) { @@ -32,13 +32,13 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, template void Store(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, std::string_view size) { - StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, value)); + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); } void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, ret), + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, ret), fmt::format("MOV.U {},{{0,0,0,0}};", ret)); } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 03464524e..f3baf33af 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -281,14 +281,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist NotImplemented(); } -void EmitPackDouble2x32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitUnpackDouble2x32(EmitContext& ctx, Register value) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 030b48d83..82b627500 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -14,12 +14,11 @@ namespace Shader::Backend::GLASM { Register RegAlloc::Define(IR::Inst& inst) { - const Id id{Alloc()}; - inst.SetDefinition(id); - Register ret; - ret.type = Type::Register; - ret.id = id; - return ret; + return Define(inst, false); +} + +Register RegAlloc::LongDefine(IR::Inst& inst) { + return Define(inst, true); } Value RegAlloc::Consume(const IR::Value& value) { @@ -40,6 +39,10 @@ Value RegAlloc::Consume(const IR::Value& value) { ret.type = Type::F32; ret.imm_f32 = value.F32(); break; + case IR::Type::F64: + ret.type = Type::F64; + ret.imm_f64 = value.F64(); + break; default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -49,7 +52,14 @@ Value RegAlloc::Consume(const IR::Value& value) { Register RegAlloc::AllocReg() { Register ret; ret.type = Type::Register; - ret.id = Alloc(); + ret.id = Alloc(false); + return ret; +} + +Register RegAlloc::AllocLongReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(true); return ret; } @@ -57,6 +67,15 @@ void RegAlloc::FreeReg(Register reg) { Free(reg.id); } +Register RegAlloc::Define(IR::Inst& inst, bool is_long) { + const Id id{Alloc(is_long)}; + inst.SetDefinition(id); + Register ret; + ret.type = Type::Register; + ret.id = id; + return ret; +} + Value RegAlloc::Consume(IR::Inst& inst) { const Id id{inst.Definition()}; inst.DestructiveRemoveUsage(); @@ -69,18 +88,23 @@ Value RegAlloc::Consume(IR::Inst& inst) { return ret; } -Id RegAlloc::Alloc() { - for (size_t reg = 0; reg < NUM_REGS; ++reg) { - if (register_use[reg]) { - continue; +Id RegAlloc::Alloc(bool is_long) { + size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; + std::bitset& use{is_long ? long_register_use : register_use}; + if (num_used_registers + num_used_long_registers < NUM_REGS) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (use[reg]) { + continue; + } + num_regs = std::max(num_regs, reg + 1); + use[reg] = true; + Id ret{}; + ret.index.Assign(static_cast(reg)); + ret.is_long.Assign(is_long ? 1 : 0); + ret.is_spill.Assign(0); + ret.is_condition_code.Assign(0); + return ret; } - num_used_registers = std::max(num_used_registers, reg + 1); - register_use[reg] = true; - Id ret{}; - ret.index.Assign(static_cast(reg)); - ret.is_spill.Assign(0); - ret.is_condition_code.Assign(0); - return ret; } throw NotImplementedException("Register spilling"); } @@ -89,7 +113,11 @@ void RegAlloc::Free(Id id) { if (id.is_spill != 0) { throw NotImplementedException("Free spill"); } - register_use[id.index] = false; + if (id.is_long != 0) { + long_register_use[id.index] = false; + } else { + register_use[id.index] = false; + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 6a238afa9..f1899eae1 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -27,12 +27,14 @@ enum class Type : u32 { U32, S32, F32, + F64, }; struct Id { union { u32 raw; - BitField<0, 30, u32> index; + BitField<0, 29, u32> index; + BitField<29, 1, u32> is_long; BitField<30, 1, u32> is_spill; BitField<31, 1, u32> is_condition_code; }; @@ -53,6 +55,7 @@ struct Value { u32 imm_u32; s32 imm_s32; f32 imm_f32; + f64 imm_f64; }; bool operator==(const Value& rhs) const noexcept { @@ -68,6 +71,8 @@ struct Value { return imm_s32 == rhs.imm_s32; case Type::F32: return Common::BitCast(imm_f32) == Common::BitCast(rhs.imm_f32); + case Type::F64: + return Common::BitCast(imm_f64) == Common::BitCast(rhs.imm_f64); } return false; } @@ -80,6 +85,7 @@ struct ScalarRegister : Value {}; struct ScalarU32 : Value {}; struct ScalarS32 : Value {}; struct ScalarF32 : Value {}; +struct ScalarF64 : Value {}; class RegAlloc { public: @@ -87,9 +93,13 @@ public: Register Define(IR::Inst& inst); + Register LongDefine(IR::Inst& inst); + Value Consume(const IR::Value& value); - Register AllocReg(); + [[nodiscard]] Register AllocReg(); + + [[nodiscard]] Register AllocLongReg(); void FreeReg(Register reg); @@ -97,19 +107,27 @@ public: return num_used_registers; } + [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { + return num_used_long_registers; + } + private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; + Register Define(IR::Inst& inst, bool is_long); + Value Consume(IR::Inst& inst); - Id Alloc(); + Id Alloc(bool is_long); void Free(Id id); EmitContext& ctx; size_t num_used_registers{}; + size_t num_used_long_registers{}; std::bitset register_use{}; + std::bitset long_register_use{}; }; template @@ -121,9 +139,17 @@ auto FormatTo(FormatContext& ctx, Id id) { throw NotImplementedException("Spill emission"); } if constexpr (scalar) { - return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); + if (id.is_long != 0) { + return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); + } } else { - return fmt::format_to(ctx.out(), "R{}", id.index.Value()); + if (id.is_long != 0) { + return fmt::format_to(ctx.out(), "D{}", id.index.Value()); + } else { + return fmt::format_to(ctx.out(), "R{}", id.index.Value()); + } } } @@ -184,6 +210,8 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_s32)); case Shader::Backend::GLASM::Type::F32: return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); + case Shader::Backend::GLASM::Type::F64: + break; } throw Shader::InvalidArgument("Invalid value type {}", value.type); } @@ -205,6 +233,8 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", value.imm_s32); case Shader::Backend::GLASM::Type::F32: return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); + case Shader::Backend::GLASM::Type::F64: + break; } throw Shader::InvalidArgument("Invalid value type {}", value.type); } @@ -226,6 +256,29 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_s32)); case Shader::Backend::GLASM::Type::F32: return fmt::format_to(ctx.out(), "{}", value.imm_f32); + case Shader::Backend::GLASM::Type::F64: + break; + } + throw Shader::InvalidArgument("Invalid value type {}", value.type); + } +}; + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { + switch (value.type) { + case Shader::Backend::GLASM::Type::Register: + return Shader::Backend::GLASM::FormatTo(ctx, value.id); + case Shader::Backend::GLASM::Type::U32: + case Shader::Backend::GLASM::Type::S32: + case Shader::Backend::GLASM::Type::F32: + break; + case Shader::Backend::GLASM::Type::F64: + return format_to(ctx.out(), "{}", value.imm_f64); } throw Shader::InvalidArgument("Invalid value type {}", value.type); } From 3e10709091ea6d126474dbac2f2c8f0b6e9a0d3f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 9 May 2021 17:57:57 -0400 Subject: [PATCH 353/770] glasm: Reimplement bitwise ops and BFI/BFE --- .../backend/glasm/emit_glasm_instructions.h | 34 ++++---- .../backend/glasm/emit_glasm_integer.cpp | 82 ++++++++----------- .../glasm/emit_glasm_not_implemented.cpp | 28 +------ .../backend/glasm/emit_glasm_select.cpp | 52 ++++++++++++ 4 files changed, 108 insertions(+), 88 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index ab1e08215..aef5f7663 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -185,10 +185,12 @@ void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register obj void EmitSelectU1(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); -void EmitSelectU32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); void EmitSelectU64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); -void EmitSelectF32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); @@ -313,15 +315,15 @@ void EmitShiftRightArithmetic64(EmitContext& ctx, Register base, Register shift) void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); -void EmitBitFieldInsert(EmitContext& ctx, ScalarS32 base, ScalarS32 insert, ScalarS32 offset, - ScalarS32 count); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, + ScalarS32 offset, ScalarS32 count); void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, ScalarS32 count); void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, ScalarU32 count); -void EmitBitReverse32(EmitContext& ctx, ScalarS32 value); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitBitCount32(EmitContext& ctx, ScalarS32 value); -void EmitBitwiseNot32(EmitContext& ctx, ScalarS32 value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitFindSMsb32(EmitContext& ctx, ScalarS32 value); void EmitFindUMsb32(EmitContext& ctx, ScalarU32 value); void EmitSMin32(EmitContext& ctx, ScalarS32 a, ScalarS32 b); @@ -330,16 +332,16 @@ void EmitSMax32(EmitContext& ctx, ScalarS32 a, ScalarS32 b); void EmitUMax32(EmitContext& ctx, ScalarU32 a, ScalarU32 b); void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); -void EmitSLessThan(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); -void EmitULessThan(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); -void EmitIEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); -void EmitSLessThanEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); -void EmitULessThanEqual(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); -void EmitSGreaterThan(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); -void EmitUGreaterThan(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); -void EmitINotEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, ScalarS32 lhs, ScalarS32 rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, ScalarU32 lhs, ScalarU32 rhs); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); void EmitSharedAtomicIAdd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); void EmitSharedAtomicSMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value); void EmitSharedAtomicUMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 7b88d6f02..2db05e62d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -87,34 +87,34 @@ void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b ctx.Add("XOR.S {}.x,{},{};", inst, a, b); } -void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 base, - [[maybe_unused]] ScalarS32 insert, [[maybe_unused]] ScalarS32 offset, - [[maybe_unused]] ScalarS32 count) { - throw NotImplementedException("GLASM instruction"); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, + ScalarS32 offset, ScalarS32 count) { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); + ctx.Add("BFI.S {},RC,{},{};", inst, insert, base); } -void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] ScalarS32 base, [[maybe_unused]] ScalarS32 offset, - [[maybe_unused]] ScalarS32 count) { - throw NotImplementedException("GLASM instruction"); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, + ScalarS32 count) { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); + ctx.Add("BFE.S {},RC,{};", inst, base); } -void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] ScalarU32 base, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] ScalarU32 count) { - throw NotImplementedException("GLASM instruction"); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, + ScalarU32 count) { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); + ctx.Add("BFE.U {},RC,{};", inst, base); } -void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BFR {},{};", inst, value); } void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { throw NotImplementedException("GLASM instruction"); } -void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("NOT.S {},{};", inst, value); } void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { @@ -157,54 +157,44 @@ void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& throw NotImplementedException("GLASM instruction"); } -void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, - [[maybe_unused]] ScalarS32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SLT.S {},{},{};", inst, lhs, rhs); } -void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, - [[maybe_unused]] ScalarU32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SLT.U {},{},{};", inst, lhs, rhs); } -void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, - [[maybe_unused]] ScalarS32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SEQ.S {},{},{};", inst, lhs, rhs); } -void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, - [[maybe_unused]] ScalarS32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SLE.S {},{},{};", inst, lhs, rhs); } -void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, - [[maybe_unused]] ScalarU32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SLE.U {},{},{};", inst, lhs, rhs); } -void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, - [[maybe_unused]] ScalarS32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SGT.S {},{},{};", inst, lhs, rhs); } -void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, - [[maybe_unused]] ScalarU32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SGT.U {},{},{};", inst, lhs, rhs); } -void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, - [[maybe_unused]] ScalarS32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SNE.U {},{},{};", inst, lhs, rhs); } -void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 lhs, - [[maybe_unused]] ScalarS32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { + ctx.Add("SGE.S {},{},{};", inst, lhs, rhs); } -void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 lhs, - [[maybe_unused]] ScalarU32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { + ctx.Add("SGE.U {},{},{};", inst, lhs, rhs); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index f3baf33af..e94571f2d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -249,35 +249,11 @@ void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { NotImplemented(); } -void EmitSelectU1(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { +void EmitPackDouble2x32(EmitContext& ctx, Register value) { NotImplemented(); } -void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { - NotImplemented(); -} - -void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { - NotImplemented(); -} - -void EmitSelectU32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { - NotImplemented(); -} - -void EmitSelectU64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value) { - NotImplemented(); -} - -void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value) { - NotImplemented(); -} - -void EmitSelectF32(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value) { - NotImplemented(); -} - -void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value) { +void EmitUnpackDouble2x32(EmitContext& ctx, Register value) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index e69de29bb..8f9df8e23 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -0,0 +1,52 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitSelectU1([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); +} + +void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, + [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM From 6705f56029d627941f56e36273fd4548a88e8b63 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 9 May 2021 18:49:27 -0400 Subject: [PATCH 354/770] emit_glasm: Implement more integer alu ops --- .../backend/glasm/emit_glasm_instructions.h | 22 +++---- .../backend/glasm/emit_glasm_integer.cpp | 66 +++++++++---------- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index aef5f7663..b19899624 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -302,15 +302,15 @@ void EmitIAdd64(EmitContext& ctx, Register a, Register b); void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitISub64(EmitContext& ctx, Register a, Register b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); -void EmitINeg32(EmitContext& ctx, ScalarS32 value); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitINeg64(EmitContext& ctx, Register value); -void EmitIAbs32(EmitContext& ctx, ScalarS32 value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitIAbs64(EmitContext& ctx, Register value); void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); void EmitShiftLeftLogical64(EmitContext& ctx, Register base, Register shift); -void EmitShiftRightLogical32(EmitContext& ctx, ScalarU32 base, ScalarU32 shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); void EmitShiftRightLogical64(EmitContext& ctx, Register base, Register shift); -void EmitShiftRightArithmetic32(EmitContext& ctx, ScalarS32 base, ScalarS32 shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); void EmitShiftRightArithmetic64(EmitContext& ctx, Register base, Register shift); void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); @@ -322,14 +322,14 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, ScalarU32 count); void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); -void EmitBitCount32(EmitContext& ctx, ScalarS32 value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); -void EmitFindSMsb32(EmitContext& ctx, ScalarS32 value); -void EmitFindUMsb32(EmitContext& ctx, ScalarU32 value); -void EmitSMin32(EmitContext& ctx, ScalarS32 a, ScalarS32 b); -void EmitUMin32(EmitContext& ctx, ScalarU32 a, ScalarU32 b); -void EmitSMax32(EmitContext& ctx, ScalarS32 a, ScalarS32 b); -void EmitUMax32(EmitContext& ctx, ScalarU32 a, ScalarU32 b); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 2db05e62d..9d72af942 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -30,16 +30,16 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { ctx.Add("MUL.S {}.x,{},{};", inst, a, b); } -void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("MOV.S {},-{};", inst, value); } void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("ABS.S {},{};", inst, value); } void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { @@ -55,9 +55,8 @@ void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] throw NotImplementedException("GLASM instruction"); } -void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 base, - [[maybe_unused]] ScalarU32 shift) { - throw NotImplementedException("GLASM instruction"); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { + ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); } void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, @@ -65,9 +64,8 @@ void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] throw NotImplementedException("GLASM instruction"); } -void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 base, - [[maybe_unused]] ScalarS32 shift) { - throw NotImplementedException("GLASM instruction"); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { + ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); } void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, @@ -109,52 +107,48 @@ void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("BFR {},{};", inst, value); } -void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BTC {},{};", inst, value); } void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("NOT.S {},{};", inst, value); } -void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("BTFM.S {},{};", inst, value); } -void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + ctx.Add("BTFM.U {},{};", inst, value); } -void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 a, - [[maybe_unused]] ScalarS32 b) { - throw NotImplementedException("GLASM instruction"); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MIN.S {},{},{};", inst, a, b); } -void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 a, - [[maybe_unused]] ScalarU32 b) { - throw NotImplementedException("GLASM instruction"); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("MIN.U {},{},{};", inst, a, b); } -void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 a, - [[maybe_unused]] ScalarS32 b) { - throw NotImplementedException("GLASM instruction"); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("MAX.S {},{},{};", inst, a, b); } -void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 a, - [[maybe_unused]] ScalarU32 b) { - throw NotImplementedException("GLASM instruction"); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { + ctx.Add("MAX.U {},{},{};", inst, a, b); } -void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] ScalarS32 value, [[maybe_unused]] ScalarS32 min, - [[maybe_unused]] ScalarS32 max) { - throw NotImplementedException("GLASM instruction"); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.S {}.x,{},{};", ret, max, value); + ctx.Add("MAX.S {}.x,{},{};", ret, ret, min); } -void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] ScalarU32 value, [[maybe_unused]] ScalarU32 min, - [[maybe_unused]] ScalarU32 max) { - throw NotImplementedException("GLASM instruction"); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.U {}.x,{},{};", ret, max, value); + ctx.Add("MAX.U {}.x,{},{};", ret, ret, min); } void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { From b4953e79ee5eb9ed0918ce8696571240d7a07af8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 21:13:09 -0300 Subject: [PATCH 355/770] glasm: Add floating-point comparisons on GLASM --- .../glasm/emit_glasm_floating_point.cpp | 170 +++++++++--------- .../backend/glasm/emit_glasm_instructions.h | 54 +++--- .../glasm/emit_glasm_not_implemented.cpp | 12 -- 3 files changed, 116 insertions(+), 120 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index 2b9a210aa..1139a22bd 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -10,6 +10,31 @@ namespace Shader::Backend::GLASM { +template +static void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, + std::string_view op, std::string_view type, bool ordered, + bool inequality = false) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); + if (ordered && inequality) { + ctx.Add("SEQ.{} RC.y,{},{};" + "SEQ.{} RC.z,{},{};" + "AND.U RC.x,RC.x,RC.y;" + "AND.U RC.x,RC.x,RC.z;" + "SNE.S {}.x,RC.x,0;", + type, lhs, lhs, type, rhs, rhs, ret); + } else if (ordered) { + ctx.Add("SNE.S {}.x,RC.x,0;", ret); + } else { + ctx.Add("SNE.{} RC.y,{},{};" + "SNE.{} RC.z,{},{};" + "OR.U RC.x,RC.x,RC.y;" + "OR.U RC.x,RC.x,RC.z;" + "SNE.S {}.x,RC.x,0;", + type, lhs, lhs, type, rhs, rhs, ret); + } +} + void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); @@ -46,10 +71,8 @@ void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, Sca ctx.Add("MAD.F {}.x,{},{},{};", inst, a, b, c); } -void EmitFPFma64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] Register a, [[maybe_unused]] Register b, - [[maybe_unused]] Register c) { - throw NotImplementedException("GLASM instruction"); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { + ctx.LongAdd("MAD.F64 {}.x,{},{},{};", inst, a, b, c); } void EmitFPMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a, @@ -57,9 +80,8 @@ void EmitFPMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a throw NotImplementedException("GLASM instruction"); } -void EmitFPMax64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, - [[maybe_unused]] Register b) { - throw NotImplementedException("GLASM instruction"); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MAX.F64 {},{},{};", inst, a, b); } void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a, @@ -67,9 +89,8 @@ void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a throw NotImplementedException("GLASM instruction"); } -void EmitFPMin64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, - [[maybe_unused]] Register b) { - throw NotImplementedException("GLASM instruction"); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MIN.F64 {},{},{};", inst, a, b); } void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -81,9 +102,8 @@ void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { ctx.Add("MUL.F {}.x,{},{};", inst, a, b); } -void EmitFPMul64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] Register a, [[maybe_unused]] Register b) { - throw NotImplementedException("GLASM instruction"); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { + ctx.LongAdd("MUL.F64 {}.x,{},{};", inst, a, b); } void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { @@ -215,13 +235,11 @@ void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Regist } void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { - const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("SEQ.F {}.x,{},{};SNE.S {}.x,{},0;", ret, lhs, rhs, ret, ret); + Compare(ctx, inst, lhs, rhs, "SEQ", "F", true); } -void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true); } void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -229,14 +247,12 @@ void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Regi throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F", false); } -void EmitFPUnordEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false); } void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -244,14 +260,12 @@ void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Reg throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true); } -void EmitFPOrdNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true); } void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -259,14 +273,12 @@ void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] R throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true); } -void EmitFPUnordNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true); } void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -275,13 +287,11 @@ void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Reg } void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { - const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("SLT.F {}.x,{},{};SNE.S {}.x,{}.x,0;", ret, lhs, rhs, ret, ret); + Compare(ctx, inst, lhs, rhs, "SLT", "F", true); } -void EmitFPOrdLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F64", true); } void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -289,14 +299,12 @@ void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] R throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F", false); } -void EmitFPUnordLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLT", "F64", false); } void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -304,14 +312,12 @@ void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F", true); } -void EmitFPOrdGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F64", true); } void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -319,14 +325,12 @@ void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused] throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F", false); } -void EmitFPUnordGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGT", "F64", false); } void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -335,13 +339,11 @@ void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused] } void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { - const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("SLE.F {}.x,{},{};SNE.S {}.x,{}.x,0;", ret, lhs, rhs, ret, ret); + Compare(ctx, inst, lhs, rhs, "SLE", "F", true); } -void EmitFPOrdLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F64", true); } void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -349,14 +351,12 @@ void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unuse throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordLessThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F", false); } -void EmitFPUnordLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SLE", "F64", false); } void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -364,14 +364,12 @@ void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unus throw NotImplementedException("GLASM instruction"); } -void EmitFPOrdGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 lhs, - [[maybe_unused]] ScalarF32 rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F", true); } -void EmitFPOrdGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F64", true); } void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, @@ -379,14 +377,24 @@ void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_un throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] ScalarF32 lhs, [[maybe_unused]] ScalarF32 rhs) { +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F", false); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { + Compare(ctx, inst, lhs, rhs, "SGE", "F64", false); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPUnordGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, - [[maybe_unused]] Register rhs) { - throw NotImplementedException("GLASM instruction"); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Compare(ctx, inst, value, value, "SNE", "F", true, false); +} + +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Compare(ctx, inst, value, value, "SNE", "F64", true, false); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index b19899624..26c97ce8e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -220,14 +220,14 @@ void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); -void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); void EmitFPMax32(EmitContext& ctx, ScalarF32 a, ScalarF32 b); -void EmitFPMax64(EmitContext& ctx, Register a, Register b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPMin32(EmitContext& ctx, ScalarF32 a, ScalarF32 b); -void EmitFPMin64(EmitContext& ctx, Register a, Register b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); -void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPNeg16(EmitContext& ctx, Register value); void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); @@ -260,43 +260,43 @@ void EmitFPTrunc32(EmitContext& ctx, ScalarF32 value); void EmitFPTrunc64(EmitContext& ctx, Register value); void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPOrdEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPUnordEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPUnordEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPOrdNotEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPOrdNotEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPUnordNotEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPUnordNotEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPOrdLessThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPUnordLessThan32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPUnordLessThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPOrdGreaterThan32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPOrdGreaterThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPUnordGreaterThan32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPUnordGreaterThan64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPOrdLessThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPUnordLessThanEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPUnordLessThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, ScalarF32 lhs, ScalarF32 rhs); -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Register lhs, Register rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); void EmitFPIsNan16(EmitContext& ctx, Register value); -void EmitFPIsNan32(EmitContext& ctx, ScalarF32 value); -void EmitFPIsNan64(EmitContext& ctx, Register value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitIAdd64(EmitContext& ctx, Register a, Register b); void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index e94571f2d..60eb123cf 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -281,18 +281,6 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitFPIsNan16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitFPIsNan32(EmitContext& ctx, ScalarF32 value) { - NotImplemented(); -} - -void EmitFPIsNan64(EmitContext& ctx, Register value) { - NotImplemented(); -} - void EmitSharedAtomicIAdd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { NotImplemented(); } From 45ef62d3ba2512be3f2d445c88b85ca6c18a6fe2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 21:15:07 -0300 Subject: [PATCH 356/770] glasm: Remove unnecesary new white space on Clamp GLASM --- .../backend/glasm/emit_glasm_integer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 9d72af942..c9386805a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -141,14 +141,14 @@ void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.S {}.x,{},{};", ret, max, value); - ctx.Add("MAX.S {}.x,{},{};", ret, ret, min); + ctx.Add("MIN.S {}.x,{},{};", ret, max, value); + ctx.Add("MAX.S {}.x,{},{};", ret, ret, min); } void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.U {}.x,{},{};", ret, max, value); - ctx.Add("MAX.U {}.x,{},{};", ret, ret, min); + ctx.Add("MIN.U {}.x,{},{};", ret, max, value); + ctx.Add("MAX.U {}.x,{},{};", ret, ret, min); } void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { From 99352741afba5c88ebd31d199bdd66dcafa29bb4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 21:17:25 -0300 Subject: [PATCH 357/770] glasm: Remove duplicated Fp64 pack instructions on GLASM --- .../backend/glasm/emit_glasm_not_implemented.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 60eb123cf..3f224062f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -249,14 +249,6 @@ void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { NotImplemented(); } -void EmitPackDouble2x32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitUnpackDouble2x32(EmitContext& ctx, Register value) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } From 43a448d98d0d679c343eb81af5cedcd9be2112fa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 21:17:48 -0300 Subject: [PATCH 358/770] glasm: Add logical instructions on GLASM --- .../backend/glasm/emit_glasm_instructions.h | 8 ++++---- .../backend/glasm/emit_glasm_not_implemented.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 26c97ce8e..174eaaafa 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -437,10 +437,10 @@ void EmitGlobalAtomicMinF16x2(EmitContext& ctx); void EmitGlobalAtomicMinF32x2(EmitContext& ctx); void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -void EmitLogicalOr(EmitContext& ctx, ScalarS32 a, ScalarS32 b); -void EmitLogicalAnd(EmitContext& ctx, ScalarS32 a, ScalarS32 b); -void EmitLogicalXor(EmitContext& ctx, ScalarS32 a, ScalarS32 b); -void EmitLogicalNot(EmitContext& ctx, ScalarS32 value); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitConvertS16F16(EmitContext& ctx, Register value); void EmitConvertS16F32(EmitContext& ctx, Register value); void EmitConvertS16F64(EmitContext& ctx, Register value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 3f224062f..29eb75c6a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -572,20 +572,20 @@ void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { NotImplemented(); } -void EmitLogicalOr(EmitContext& ctx, ScalarS32 a, ScalarS32 b) { - NotImplemented(); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("OR.S {},{},{};", inst, a, b); } -void EmitLogicalAnd(EmitContext& ctx, ScalarS32 a, ScalarS32 b) { - NotImplemented(); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("AND.S {},{},{};", inst, a, b); } -void EmitLogicalXor(EmitContext& ctx, ScalarS32 a, ScalarS32 b) { - NotImplemented(); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + ctx.Add("XOR.S {},{},{};", inst, a, b); } -void EmitLogicalNot(EmitContext& ctx, ScalarS32 value) { - NotImplemented(); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + ctx.Add("SEQ.S {},{},0;", inst, value); } void EmitConvertS16F16(EmitContext& ctx, Register value) { From 7703d65f234a3032adef795037320cd30ef28a3a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 21:22:55 -0300 Subject: [PATCH 359/770] glasm: Add fp min/max insts and fix store for fp64 on GLASM --- .../backend/glasm/emit_glasm_floating_point.cpp | 14 ++++++-------- .../backend/glasm/emit_glasm_instructions.h | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index 1139a22bd..aab506109 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -75,22 +75,20 @@ void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, Sca ctx.LongAdd("MAD.F64 {}.x,{},{},{};", inst, a, b, c); } -void EmitFPMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a, - [[maybe_unused]] ScalarF32 b) { - throw NotImplementedException("GLASM instruction"); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MAX.F {}.x,{},{};", inst, a, b); } void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { - ctx.LongAdd("MAX.F64 {},{},{};", inst, a, b); + ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b); } -void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 a, - [[maybe_unused]] ScalarF32 b) { - throw NotImplementedException("GLASM instruction"); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { + ctx.Add("MIN.F {}.x,{},{};", inst, a, b); } void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { - ctx.LongAdd("MIN.F64 {},{},{};", inst, a, b); + ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b); } void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 174eaaafa..5d94f21a6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -221,9 +221,9 @@ void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); -void EmitFPMax32(EmitContext& ctx, ScalarF32 a, ScalarF32 b); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); -void EmitFPMin32(EmitContext& ctx, ScalarF32 a, ScalarF32 b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); From ad61b47f80b96436ef675abcf1123668d9c1180d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 22:43:29 -0300 Subject: [PATCH 360/770] glasm: Add conversion instructions to GLASM --- .../backend/glasm/emit_glasm.cpp | 3 + .../glasm/emit_glasm_bitwise_conversion.cpp | 8 +- .../backend/glasm/emit_glasm_convert.cpp | 231 ++++++++++++++++++ .../glasm/emit_glasm_floating_point.cpp | 50 ++-- .../backend/glasm/emit_glasm_instructions.h | 122 ++++----- .../backend/glasm/emit_glasm_integer.cpp | 10 +- .../glasm/emit_glasm_not_implemented.cpp | 192 --------------- .../backend/glasm/reg_alloc.cpp | 4 + .../backend/glasm/reg_alloc.h | 13 + 9 files changed, 351 insertions(+), 282 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 9db6eb4a0..0e4b189c9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -59,6 +59,9 @@ struct RegWrapper { case Type::F32: ctx.Add("MOV.F {}.x,{};", reg, value.imm_f32); break; + case Type::U64: + ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); + break; case Type::F64: ctx.Add("MOV.F64 {}.x,{};", reg, value.imm_f64); break; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index eb6140954..a6c66b826 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -48,12 +48,12 @@ void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } -void EmitPackUint2x32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("PK64.U {}.x,{};", inst, value); } -void EmitUnpackUint2x32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.Add("UP64.U {}.xy,{}.x;", inst, value); } void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp index e69de29bb..ccdf1cbc8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp @@ -0,0 +1,231 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +std::string_view FpRounding(IR::FpRounding fp_rounding) { + switch (fp_rounding) { + case IR::FpRounding::DontCare: + return ""; + case IR::FpRounding::RN: + return ".ROUND"; + case IR::FpRounding::RZ: + return ".TRUNC"; + case IR::FpRounding::RM: + return ".FLR"; + case IR::FpRounding::RP: + return ".CEIL"; + } + throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding); +} + +template +void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest, + std::string_view src, bool is_long_result) { + const std::string_view fp_rounding{FpRounding(inst.Flags().rounding)}; + const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)}; + ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value); +} +} // Anonymous namespace + +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S16", "F16", false); +} + +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S16", "F32", false); +} + +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S16", "F64", false); +} + +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S32", "F16", false); +} + +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S32", "F32", false); +} + +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S32", "F64", false); +} + +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "S64", "F16", true); +} + +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "S64", "F32", true); +} + +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "S64", "F64", true); +} + +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U16", "F16", false); +} + +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U16", "F32", false); +} + +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U16", "F64", false); +} + +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U32", "F16", false); +} + +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U32", "F32", false); +} + +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U32", "F64", false); +} + +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U64", "F16", true); +} + +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "U64", "F32", true); +} + +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "U64", "F64", true); +} + +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "U64", "U32", true); +} + +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "U32", "U64", false); +} + +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "F16", "F32", false); +} + +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "F16", false); +} + +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + Convert(ctx, inst, value, "F32", "F64", false); +} + +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + Convert(ctx, inst, value, "F64", "F32", true); +} + +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S8", false); +} + +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S16", false); +} + +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F16", "S32", false); +} + +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "S64", false); +} + +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U8", false); +} + +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U16", false); +} + +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F16", "U32", false); +} + +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F16", "U64", false); +} + +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S8", false); +} + +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S16", false); +} + +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F32", "S32", false); +} + +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "S64", false); +} + +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U8", false); +} + +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U16", false); +} + +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F32", "U32", false); +} + +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F32", "U64", false); +} + +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S8", true); +} + +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S16", true); +} + +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { + Convert(ctx, inst, value, "F64", "S32", true); +} + +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "S64", true); +} + +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U8", true); +} + +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U16", true); +} + +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { + Convert(ctx, inst, value, "F64", "U32", true); +} + +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) { + Convert(ctx, inst, value, "F64", "U64", true); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index aab506109..2aee5a56c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -169,62 +169,68 @@ void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register throw NotImplementedException("GLASM instruction"); } -void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value, - [[maybe_unused]] ScalarF32 min_value, [[maybe_unused]] ScalarF32 max_value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, + ScalarF32 max_value) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("MIN.F {}.x,{},{};" + "MAX.F {}.x,{},{};", + ret, max_value, value, ret, ret, min_value); } -void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, - [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, + ScalarF64 max_value) { + const Register ret{ctx.reg_alloc.LongDefine(inst)}; + ctx.Add("MIN.F64 {}.x,{},{};" + "MAX.F64 {}.x,{},{};", + ret, max_value, value, ret, ret, min_value); } void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("ROUND.F {}.x,{};", inst, value); } -void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value); } void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPFloor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("FLR.F {}.x,{};", inst, value); } -void EmitFPFloor64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("FLR.F64 {}.x,{};", inst, value); } void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPCeil32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("CEIL.F {}.x,{};", inst, value); } -void EmitFPCeil64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value); } void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPTrunc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("TRUNC.F {}.x,{};", inst, value); } -void EmitFPTrunc64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { + ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value); } void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 5d94f21a6..94843cc60 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -198,8 +198,8 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitPackUint2x32(EmitContext& ctx, Register value); -void EmitUnpackUint2x32(EmitContext& ctx, Register value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); void EmitPackFloat2x16(EmitContext& ctx, Register value); void EmitUnpackFloat2x16(EmitContext& ctx, Register value); void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); @@ -244,20 +244,22 @@ void EmitFPSaturate16(EmitContext& ctx, Register value); void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); void EmitFPSaturate64(EmitContext& ctx, Register value); void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); -void EmitFPClamp32(EmitContext& ctx, ScalarF32 value, ScalarF32 min_value, ScalarF32 max_value); -void EmitFPClamp64(EmitContext& ctx, Register value, Register min_value, Register max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, + ScalarF32 max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, + ScalarF64 max_value); void EmitFPRoundEven16(EmitContext& ctx, Register value); -void EmitFPRoundEven32(EmitContext& ctx, ScalarF32 value); -void EmitFPRoundEven64(EmitContext& ctx, Register value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitFPFloor16(EmitContext& ctx, Register value); -void EmitFPFloor32(EmitContext& ctx, ScalarF32 value); -void EmitFPFloor64(EmitContext& ctx, Register value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitFPCeil16(EmitContext& ctx, Register value); -void EmitFPCeil32(EmitContext& ctx, ScalarF32 value); -void EmitFPCeil64(EmitContext& ctx, Register value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitFPTrunc16(EmitContext& ctx, Register value); -void EmitFPTrunc32(EmitContext& ctx, ScalarF32 value); -void EmitFPTrunc64(EmitContext& ctx, Register value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); @@ -441,54 +443,54 @@ void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); -void EmitConvertS16F16(EmitContext& ctx, Register value); -void EmitConvertS16F32(EmitContext& ctx, Register value); -void EmitConvertS16F64(EmitContext& ctx, Register value); -void EmitConvertS32F16(EmitContext& ctx, Register value); -void EmitConvertS32F32(EmitContext& ctx, Register value); -void EmitConvertS32F64(EmitContext& ctx, Register value); -void EmitConvertS64F16(EmitContext& ctx, Register value); -void EmitConvertS64F32(EmitContext& ctx, Register value); -void EmitConvertS64F64(EmitContext& ctx, Register value); -void EmitConvertU16F16(EmitContext& ctx, Register value); -void EmitConvertU16F32(EmitContext& ctx, Register value); -void EmitConvertU16F64(EmitContext& ctx, Register value); -void EmitConvertU32F16(EmitContext& ctx, Register value); -void EmitConvertU32F32(EmitContext& ctx, Register value); -void EmitConvertU32F64(EmitContext& ctx, Register value); -void EmitConvertU64F16(EmitContext& ctx, Register value); -void EmitConvertU64F32(EmitContext& ctx, Register value); -void EmitConvertU64F64(EmitContext& ctx, Register value); -void EmitConvertU64U32(EmitContext& ctx, Register value); -void EmitConvertU32U64(EmitContext& ctx, Register value); -void EmitConvertF16F32(EmitContext& ctx, Register value); -void EmitConvertF32F16(EmitContext& ctx, Register value); -void EmitConvertF32F64(EmitContext& ctx, Register value); -void EmitConvertF64F32(EmitContext& ctx, Register value); -void EmitConvertF16S8(EmitContext& ctx, Register value); -void EmitConvertF16S16(EmitContext& ctx, Register value); -void EmitConvertF16S32(EmitContext& ctx, Register value); -void EmitConvertF16S64(EmitContext& ctx, Register value); -void EmitConvertF16U8(EmitContext& ctx, Register value); -void EmitConvertF16U16(EmitContext& ctx, Register value); -void EmitConvertF16U32(EmitContext& ctx, Register value); -void EmitConvertF16U64(EmitContext& ctx, Register value); -void EmitConvertF32S8(EmitContext& ctx, Register value); -void EmitConvertF32S16(EmitContext& ctx, Register value); -void EmitConvertF32S32(EmitContext& ctx, Register value); -void EmitConvertF32S64(EmitContext& ctx, Register value); -void EmitConvertF32U8(EmitContext& ctx, Register value); -void EmitConvertF32U16(EmitContext& ctx, Register value); -void EmitConvertF32U32(EmitContext& ctx, Register value); -void EmitConvertF32U64(EmitContext& ctx, Register value); -void EmitConvertF64S8(EmitContext& ctx, Register value); -void EmitConvertF64S16(EmitContext& ctx, Register value); -void EmitConvertF64S32(EmitContext& ctx, Register value); -void EmitConvertF64S64(EmitContext& ctx, Register value); -void EmitConvertF64U8(EmitContext& ctx, Register value); -void EmitConvertF64U16(EmitContext& ctx, Register value); -void EmitConvertF64U32(EmitContext& ctx, Register value); -void EmitConvertF64U64(EmitContext& ctx, Register value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitBindlessImageSampleImplicitLod(EmitContext&); void EmitBindlessImageSampleExplicitLod(EmitContext&); void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index c9386805a..40f48a091 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -141,14 +141,16 @@ void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.S {}.x,{},{};", ret, max, value); - ctx.Add("MAX.S {}.x,{},{};", ret, ret, min); + ctx.Add("MIN.S {}.x,{},{};" + "MAX.S {}.x,{},{};", + ret, max, value, ret, ret, min); } void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.U {}.x,{},{};", ret, max, value); - ctx.Add("MAX.U {}.x,{},{};", ret, ret, min); + ctx.Add("MIN.U {}.x,{},{};" + "MAX.U {}.x,{},{};", + ret, max, value, ret, ret, min); } void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 29eb75c6a..ebdbbcf5f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -588,198 +588,6 @@ void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("SEQ.S {},{},0;", inst, value); } -void EmitConvertS16F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS16F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS16F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS32F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS32F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS32F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS64F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS64F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertS64F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU16F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU16F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU16F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU32F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU32F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU32F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU64F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU64F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU64F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU64U32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertU32U64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32F16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32F64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64F32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16S8(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16S16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16S32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16S64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16U8(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16U16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16U32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF16U64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32S8(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32S16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32S32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32S64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32U8(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32U16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32U32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF32U64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64S8(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64S16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64S32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64S64(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64U8(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64U16(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64U32(EmitContext& ctx, Register value) { - NotImplemented(); -} - -void EmitConvertF64U64(EmitContext& ctx, Register value) { - NotImplemented(); -} - void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 82b627500..1a65a5e7d 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -39,6 +39,10 @@ Value RegAlloc::Consume(const IR::Value& value) { ret.type = Type::F32; ret.imm_f32 = value.F32(); break; + case IR::Type::U64: + ret.type = Type::U64; + ret.imm_u64 = value.U64(); + break; case IR::Type::F64: ret.type = Type::F64; ret.imm_f64 = value.F64(); diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index f1899eae1..200c51610 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -27,6 +27,7 @@ enum class Type : u32 { U32, S32, F32, + U64, F64, }; @@ -55,6 +56,7 @@ struct Value { u32 imm_u32; s32 imm_s32; f32 imm_f32; + u64 imm_u64; f64 imm_f64; }; @@ -71,6 +73,8 @@ struct Value { return imm_s32 == rhs.imm_s32; case Type::F32: return Common::BitCast(imm_f32) == Common::BitCast(rhs.imm_f32); + case Type::U64: + return imm_u64 == rhs.imm_u64; case Type::F64: return Common::BitCast(imm_f64) == Common::BitCast(rhs.imm_f64); } @@ -103,6 +107,10 @@ public: void FreeReg(Register reg); + void InvalidateConditionCodes() { + // This does nothing for now + } + [[nodiscard]] size_t NumUsedRegisters() const noexcept { return num_used_registers; } @@ -210,6 +218,7 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_s32)); case Shader::Backend::GLASM::Type::F32: return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); + case Shader::Backend::GLASM::Type::U64: case Shader::Backend::GLASM::Type::F64: break; } @@ -233,6 +242,7 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", value.imm_s32); case Shader::Backend::GLASM::Type::F32: return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); + case Shader::Backend::GLASM::Type::U64: case Shader::Backend::GLASM::Type::F64: break; } @@ -256,6 +266,7 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_s32)); case Shader::Backend::GLASM::Type::F32: return fmt::format_to(ctx.out(), "{}", value.imm_f32); + case Shader::Backend::GLASM::Type::U64: case Shader::Backend::GLASM::Type::F64: break; } @@ -277,6 +288,8 @@ struct fmt::formatter { case Shader::Backend::GLASM::Type::S32: case Shader::Backend::GLASM::Type::F32: break; + case Shader::Backend::GLASM::Type::U64: + return format_to(ctx.out(), "{}", Common::BitCast(value.imm_u64)); case Shader::Backend::GLASM::Type::F64: return format_to(ctx.out(), "{}", value.imm_f64); } From 80813b1d144a7f0f11047e7348620b720def93a9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 9 May 2021 22:01:03 -0400 Subject: [PATCH 361/770] glasm: Implement storage atomic ops --- .../backend/glasm/emit_glasm.cpp | 13 + .../backend/glasm/emit_glasm_atomic.cpp | 291 ++++++++++++++++++ .../backend/glasm/emit_glasm_instructions.h | 108 +++---- .../glasm/emit_glasm_not_implemented.cpp | 251 --------------- 4 files changed, 358 insertions(+), 305 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0e4b189c9..e6e065e7f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -149,6 +149,18 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { } throw LogicError("Invalid opcode {}", inst->GetOpcode()); } + +void SetupOptions(std::string& header, Info info) { + if (info.uses_int64_bit_atomics) { + header += "OPTION NV_shader_atomic_int64;"; + } + if (info.uses_atomic_f32_add) { + header += "OPTION NV_shader_atomic_float;"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "OPTION NV_shader_atomic_fp16_vector;"; + } +} } // Anonymous namespace std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { @@ -160,6 +172,7 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { } std::string header = "!!NVcp5.0\n" "OPTION NV_internal;"; + SetupOptions(header, program.info); switch (program.stage) { case Stage::Compute: header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp index e69de29bb..fe44c3d15 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp @@ -0,0 +1,291 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +namespace { +void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, + std::string_view then_expr, std::string_view else_expr = {}) { + // Operate on bindless SSBO, call the expression with bounds checking + // address = c[binding].xy + // length = c[binding].z + const u32 sb_binding{binding.U32()}; + ctx.Add("PK64.U DC,c[{}];" // pointer = address + "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) + "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset + "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length + sb_binding, offset, offset, sb_binding); + if (else_expr.empty()) { + ctx.Add("IF NE.x;{}ENDIF;", then_expr); + } else { + ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); + } +} + +template +void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + ValueType value, std::string_view operation, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + StorageOp(ctx, binding, offset, + fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); +} +} // namespace + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U32"); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U32"); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U32"); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "AND", "U32"); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "OR", "U32"); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U32"); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U64"); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S64"); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U64"); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S64"); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U64"); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "AND", "U64"); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "OR", "U64"); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U64"); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F32"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 94843cc60..817001afb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -356,60 +356,60 @@ void EmitSharedAtomicOr32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 void EmitSharedAtomicXor32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); void EmitSharedAtomicExchange32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); void EmitSharedAtomicExchange64(EmitContext& ctx, ScalarU32 pointer_offset, Register value); -void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarS32 value); -void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarS32 value); -void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value); -void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, ScalarU32 value); -void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, Register value); -void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarF32 value); -void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); -void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); void EmitGlobalAtomicIAdd32(EmitContext& ctx); void EmitGlobalAtomicSMin32(EmitContext& ctx); void EmitGlobalAtomicUMin32(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index ebdbbcf5f..85110bcc9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -321,257 +321,6 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, ScalarU32 pointer_offset, Regi NotImplemented(); } -void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarS32 value) { - NotImplemented(); -} - -void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarS32 value) { - NotImplemented(); -} - -void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, Register value) { - NotImplemented(); -} - -void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - ScalarF32 value) { - NotImplemented(); -} - -void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Register value) { - NotImplemented(); -} - -void EmitGlobalAtomicIAdd32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMin32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMin32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMax32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMax32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicInc32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicDec32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAnd32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicOr32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicXor32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicExchange32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicIAdd64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMin64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMin64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMax64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMax64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicInc64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicDec64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAnd64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicOr64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicXor64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicExchange64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAddF32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAddF16x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAddF32x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMinF16x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMinF32x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMaxF16x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { - NotImplemented(); -} - void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { ctx.Add("OR.S {},{},{};", inst, a, b); } From 8eb72ff0dc3eb428c28b578ffb3912c1bd1c42dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 01:33:24 -0300 Subject: [PATCH 362/770] glasm: Fix moving U64 immediates to registers in GLASM --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 ++- src/shader_recompiler/backend/glasm/reg_alloc.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index e6e065e7f..8e5d575a9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -43,7 +43,8 @@ struct RegWrapper { RegWrapper(EmitContext& ctx, Value value) : reg_alloc{ctx.reg_alloc}, allocated{value.type != Type::Register} { if (allocated) { - reg = value.type == Type::F64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); + const bool is_long{value.type == Type::F64 || value.type == Type::U64}; + reg = is_long ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); } else { reg = Register{value}; } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 200c51610..8df73ca18 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -289,9 +289,9 @@ struct fmt::formatter { case Shader::Backend::GLASM::Type::F32: break; case Shader::Backend::GLASM::Type::U64: - return format_to(ctx.out(), "{}", Common::BitCast(value.imm_u64)); + return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_u64)); case Shader::Backend::GLASM::Type::F64: - return format_to(ctx.out(), "{}", value.imm_f64); + return fmt::format_to(ctx.out(), "{}", value.imm_f64); } throw Shader::InvalidArgument("Invalid value type {}", value.type); } From 6237300e3605c0b12fb65e2a8818487ec2cb4580 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 01:37:30 -0300 Subject: [PATCH 363/770] glasm: Fix clamps so the min value has priority on NAN on GLASM --- .../glasm/emit_glasm_floating_point.cpp | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index 2aee5a56c..2e1c7d55f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -9,11 +9,10 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { - +namespace { template -static void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, - std::string_view op, std::string_view type, bool ordered, - bool inequality = false) { +void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op, + std::string_view type, bool ordered, bool inequality = false) { const Register ret{ctx.reg_alloc.Define(inst)}; ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); if (ordered && inequality) { @@ -35,6 +34,16 @@ static void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType r } } +template +void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, + InputType max_value) { + // Call MAX first to properly clamp nan to min_value instead + ctx.Add("MAX.F {}.x,{},{};" + "MIN.F {}.x,{},{};", + ret, min_value, value, ret, ret, max_value); +} +} // Anonymous namespace + void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); @@ -171,18 +180,12 @@ void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, ScalarF32 max_value) { - const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.F {}.x,{},{};" - "MAX.F {}.x,{},{};", - ret, max_value, value, ret, ret, min_value); + Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value); } void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, ScalarF64 max_value) { - const Register ret{ctx.reg_alloc.LongDefine(inst)}; - ctx.Add("MIN.F64 {}.x,{},{};" - "MAX.F64 {}.x,{},{};", - ret, max_value, value, ret, ret, min_value); + Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value); } void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { From 0839e46736858986b422b2ffb163ded59c16421f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 01:41:07 -0300 Subject: [PATCH 364/770] glasm: Implement SelectU64 on GLASM --- .../backend/glasm/emit_glasm_instructions.h | 3 ++- .../backend/glasm/emit_glasm_select.cpp | 21 ++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 817001afb..52420c3c9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -187,7 +187,8 @@ void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, Scalar void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); -void EmitSelectU64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, + Register false_value); void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index 8f9df8e23..cfde86047 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -29,9 +29,24 @@ void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 t ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); } -void EmitSelectU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, - [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { - throw NotImplementedException("GLASM instruction"); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, + Register false_value) { + ctx.reg_alloc.InvalidateConditionCodes(); + const Register ret{ctx.reg_alloc.LongDefine(inst)}; + if (ret == true_value) { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(EQ.x),{};", + cond, ret, false_value); + } else if (ret == false_value) { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(NE.x),{};", + cond, ret, true_value); + } else { + ctx.Add("MOV.S.CC RC.x,{};" + "MOV.U64 {}.x(EQ.x),{};" + "MOV.U64 {}.x(NE.x),{};", + cond, ret, false_value, ret, true_value); + } } void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, From deda89372f78dc78b37e941bf86e3026708e3ea2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 03:47:31 -0300 Subject: [PATCH 365/770] glasm: Fix register allocation when moving immediate on GLASM --- .../backend/glasm/emit_glasm.cpp | 50 +++++++++--- .../backend/glasm/reg_alloc.cpp | 77 +++++++++++-------- .../backend/glasm/reg_alloc.h | 10 ++- 3 files changed, 92 insertions(+), 45 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 8e5d575a9..ad27b8b06 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -39,14 +39,16 @@ struct Identity { }; template -struct RegWrapper { - RegWrapper(EmitContext& ctx, Value value) - : reg_alloc{ctx.reg_alloc}, allocated{value.type != Type::Register} { - if (allocated) { +class RegWrapper { +public: + RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { + const Value value{reg_alloc.Peek(ir_value)}; + if (value.type == Type::Register) { + inst = ir_value.InstRecursive(); + reg = Register{value}; + } else { const bool is_long{value.type == Type::F64 || value.type == Type::U64}; reg = is_long ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); - } else { - reg = Register{value}; } switch (value.type) { case Type::Register: @@ -68,8 +70,11 @@ struct RegWrapper { break; } } + ~RegWrapper() { - if (allocated) { + if (inst) { + reg_alloc.Unref(*inst); + } else { reg_alloc.FreeReg(reg); } } @@ -78,19 +83,42 @@ struct RegWrapper { return std::conditional_t{Value{reg}}; } +private: RegAlloc& reg_alloc; + IR::Inst* inst{}; Register reg{}; - bool allocated{}; +}; + +template +class ValueWrapper { +public: + ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) + : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} + + ~ValueWrapper() { + if (!ir_value.IsImmediate()) { + reg_alloc.Unref(*ir_value.InstRecursive()); + } + } + + ArgType Extract() { + return value; + } + +private: + RegAlloc& reg_alloc; + const IR::Value& ir_value; + ArgType value; }; template auto Arg(EmitContext& ctx, const IR::Value& arg) { if constexpr (std::is_same_v) { - return RegWrapper{ctx, ctx.reg_alloc.Consume(arg)}; + return RegWrapper{ctx, arg}; } else if constexpr (std::is_same_v) { - return RegWrapper{ctx, ctx.reg_alloc.Consume(arg)}; + return RegWrapper{ctx, arg}; } else if constexpr (std::is_base_of_v) { - return Identity{ArgType{ctx.reg_alloc.Consume(arg)}}; + return ValueWrapper{ctx, arg}; } else if constexpr (std::is_same_v) { return Identity{arg}; } else if constexpr (std::is_same_v) { diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 1a65a5e7d..f556f3aee 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -21,10 +21,40 @@ Register RegAlloc::LongDefine(IR::Inst& inst) { return Define(inst, true); } +Value RegAlloc::Peek(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive()); +} + Value RegAlloc::Consume(const IR::Value& value) { - if (!value.IsImmediate()) { - return Consume(*value.InstRecursive()); + return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); +} + +void RegAlloc::Unref(IR::Inst& inst) { + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(inst.Definition()); } +} + +Register RegAlloc::AllocReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(false); + return ret; +} + +Register RegAlloc::AllocLongReg() { + Register ret; + ret.type = Type::Register; + ret.id = Alloc(true); + return ret; +} + +void RegAlloc::FreeReg(Register reg) { + Free(reg.id); +} + +Value RegAlloc::MakeImm(const IR::Value& value) { Value ret; switch (value.Type()) { case IR::Type::U1: @@ -53,45 +83,26 @@ Value RegAlloc::Consume(const IR::Value& value) { return ret; } -Register RegAlloc::AllocReg() { - Register ret; - ret.type = Type::Register; - ret.id = Alloc(false); - return ret; -} - -Register RegAlloc::AllocLongReg() { - Register ret; - ret.type = Type::Register; - ret.id = Alloc(true); - return ret; -} - -void RegAlloc::FreeReg(Register reg) { - Free(reg.id); -} - Register RegAlloc::Define(IR::Inst& inst, bool is_long) { - const Id id{Alloc(is_long)}; - inst.SetDefinition(id); - Register ret; - ret.type = Type::Register; - ret.id = id; - return ret; + inst.SetDefinition(Alloc(is_long)); + return Register{PeekInst(inst)}; } -Value RegAlloc::Consume(IR::Inst& inst) { - const Id id{inst.Definition()}; - inst.DestructiveRemoveUsage(); - if (!inst.HasUses()) { - Free(id); - } +Value RegAlloc::PeekInst(IR::Inst& inst) { Value ret; ret.type = Type::Register; - ret.id = id; + ret.id = inst.Definition(); return ret; } +Value RegAlloc::ConsumeInst(IR::Inst& inst) { + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(inst.Definition()); + } + return PeekInst(inst); +} + Id RegAlloc::Alloc(bool is_long) { size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; std::bitset& use{is_long ? long_register_use : register_use}; diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 8df73ca18..5742436cf 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -99,8 +99,12 @@ public: Register LongDefine(IR::Inst& inst); + [[nodiscard]] Value Peek(const IR::Value& value); + Value Consume(const IR::Value& value); + void Unref(IR::Inst& inst); + [[nodiscard]] Register AllocReg(); [[nodiscard]] Register AllocLongReg(); @@ -123,9 +127,13 @@ private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; + Value MakeImm(const IR::Value& value); + Register Define(IR::Inst& inst, bool is_long); - Value Consume(IR::Inst& inst); + Value PeekInst(IR::Inst& inst); + + Value ConsumeInst(IR::Inst& inst); Id Alloc(bool is_long); From 3f00a2ad3f4d5c89599929080b1f6efaf394b62f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 03:47:57 -0300 Subject: [PATCH 366/770] glasm: Properly clamp Fp64 on GLASM --- .../backend/glasm/emit_glasm_floating_point.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index 2e1c7d55f..84028e01a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -36,11 +36,11 @@ void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std template void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, - InputType max_value) { + InputType max_value, std::string_view type) { // Call MAX first to properly clamp nan to min_value instead - ctx.Add("MAX.F {}.x,{},{};" - "MIN.F {}.x,{},{};", - ret, min_value, value, ret, ret, max_value); + ctx.Add("MAX.{} {}.x,{},{};" + "MIN.{} {}.x,{}.x,{};", + type, ret, min_value, value, type, ret, ret, max_value); } } // Anonymous namespace @@ -180,12 +180,12 @@ void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, ScalarF32 max_value) { - Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value); + Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F"); } void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, ScalarF64 max_value) { - Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value); + Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64"); } void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { From 3b6a632237e2f8388f2591d54fb31bebdc2a0ade Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 03:55:09 -0300 Subject: [PATCH 367/770] shader: Add floating-point rounding to I2F --- .../frontend/ir/ir_emitter.cpp | 60 ++++++++++--------- .../frontend/ir/ir_emitter.h | 10 ++-- .../integer_floating_point_conversion.cpp | 7 ++- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 354d72c9b..ce6c9af07 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1622,84 +1622,86 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); } -F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control) { switch (dest_bitsize) { case 16: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF16S8, value); + return Inst(Opcode::ConvertF16S8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF16S16, value); + return Inst(Opcode::ConvertF16S16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF16S32, value); + return Inst(Opcode::ConvertF16S32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF16S64, value); + return Inst(Opcode::ConvertF16S64, Flags{control}, value); } break; case 32: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF32S8, value); + return Inst(Opcode::ConvertF32S8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF32S16, value); + return Inst(Opcode::ConvertF32S16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF32S32, value); + return Inst(Opcode::ConvertF32S32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF32S64, value); + return Inst(Opcode::ConvertF32S64, Flags{control}, value); } break; case 64: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF64S8, value); + return Inst(Opcode::ConvertF64S8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF64S16, value); + return Inst(Opcode::ConvertF64S16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF64S32, value); + return Inst(Opcode::ConvertF64S32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF64S64, value); + return Inst(Opcode::ConvertF64S64, Flags{control}, value); } break; } throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control) { switch (dest_bitsize) { case 16: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF16U8, value); + return Inst(Opcode::ConvertF16U8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF16U16, value); + return Inst(Opcode::ConvertF16U16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF16U32, value); + return Inst(Opcode::ConvertF16U32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF16U64, value); + return Inst(Opcode::ConvertF16U64, Flags{control}, value); } break; case 32: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF32U8, value); + return Inst(Opcode::ConvertF32U8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF32U16, value); + return Inst(Opcode::ConvertF32U16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF32U32, value); + return Inst(Opcode::ConvertF32U32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF32U64, value); + return Inst(Opcode::ConvertF32U64, Flags{control}, value); } break; case 64: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF64U8, value); + return Inst(Opcode::ConvertF64U8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF64U16, value); + return Inst(Opcode::ConvertF64U16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF64U32, value); + return Inst(Opcode::ConvertF64U32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF64U64, value); + return Inst(Opcode::ConvertF64U64, Flags{control}, value); } break; } @@ -1707,9 +1709,9 @@ F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const } F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, - const Value& value) { - return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value) - : ConvertUToF(dest_bitsize, src_bitsize, value); + const Value& value, FpControl control) { + return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control) + : ConvertUToF(dest_bitsize, src_bitsize, value, control); } U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4e614d424..fd41b7e89 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -291,12 +291,12 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, - const Value& value); - [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, - const Value& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control = {}); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control = {}); [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, - const Value& value); + const Value& value, FpControl control = {}); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index 3c233597f..d6224d5cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -99,7 +99,12 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; const int dst_bitsize{BitSize(i2f.float_format)}; - IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(i2f.fp_rounding), + .fmz_mode = IR::FmzMode::DontCare, + }; + auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive From 5b18a12df2116fcbb4bf10a2becd6cb404af0968 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 03:55:33 -0300 Subject: [PATCH 368/770] glasm: Implement IAbs64 and INeg64 on GLASM --- .../backend/glasm/emit_glasm_instructions.h | 4 ++-- .../backend/glasm/emit_glasm_integer.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 52420c3c9..d65a474f2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -306,9 +306,9 @@ void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitISub64(EmitContext& ctx, Register a, Register b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); -void EmitINeg64(EmitContext& ctx, Register value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); -void EmitIAbs64(EmitContext& ctx, Register value); +void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); void EmitShiftLeftLogical64(EmitContext& ctx, Register base, Register shift); void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 40f48a091..2be91ccfd 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -34,16 +34,16 @@ void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("MOV.S {},-{};", inst, value); } -void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.S64 {},-{};", inst, value); } void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("ABS.S {},{};", inst, value); } -void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, Register value) { + ctx.LongAdd("MOV.S64 {},|{}|;", inst, value); } void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { From decda4a2c7e5dc6cce16f359f30bcf320c9dcf00 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 04:05:31 -0300 Subject: [PATCH 369/770] glasm: Add MUFU instructions to GLASM --- .../glasm/emit_glasm_floating_point.cpp | 29 ++++++++++--------- .../backend/glasm/emit_glasm_instructions.h | 14 ++++----- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index 84028e01a..15db6618f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -125,40 +125,41 @@ void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); } -void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("SIN {}.x,{};", inst, value); } -void EmitFPCos([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("COS {}.x,{};", inst, value); } -void EmitFPExp2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("EX2 {}.x,{};", inst, value); } -void EmitFPLog2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("LG2 {}.x,{};", inst, value); } -void EmitFPRecip32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("RCP {}.x,{};", inst, value); } void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + ctx.Add("RSQ {}.x,{};", inst, value); } void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { throw NotImplementedException("GLASM instruction"); } -void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("RSQ {}.x,{};RCP {}.x,{}.x;", ret, value, ret, ret); } void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index d65a474f2..1bbd02022 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -232,15 +232,15 @@ void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); void EmitFPNeg16(EmitContext& ctx, Register value); void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); -void EmitFPSin(EmitContext& ctx, ScalarF32 value); -void EmitFPCos(EmitContext& ctx, ScalarF32 value); -void EmitFPExp2(EmitContext& ctx, ScalarF32 value); -void EmitFPLog2(EmitContext& ctx, ScalarF32 value); -void EmitFPRecip32(EmitContext& ctx, ScalarF32 value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); void EmitFPRecip64(EmitContext& ctx, Register value); -void EmitFPRecipSqrt32(EmitContext& ctx, ScalarF32 value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); void EmitFPRecipSqrt64(EmitContext& ctx, Register value); -void EmitFPSqrt(EmitContext& ctx, ScalarF32 value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); void EmitFPSaturate16(EmitContext& ctx, Register value); void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); void EmitFPSaturate64(EmitContext& ctx, Register value); From c4fd6b55bc9acd06b2fc89f84fd175d78e14110a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 18:21:28 -0300 Subject: [PATCH 370/770] glasm: Implement shuffle and vote instructions on GLASM --- .../backend/glasm/emit_context.cpp | 17 +++ .../backend/glasm/emit_context.h | 2 + .../backend/glasm/emit_glasm.cpp | 6 + .../backend/glasm/emit_glasm_instructions.h | 28 ++--- .../glasm/emit_glasm_not_implemented.cpp | 84 +------------ .../backend/glasm/emit_glasm_warp.cpp | 118 ++++++++++++++++++ .../backend/spirv/emit_context.cpp | 2 +- .../backend/spirv/emit_spirv.cpp | 4 +- .../ir_opt/collect_shader_info_pass.cpp | 4 +- src/shader_recompiler/shader_info.h | 1 + 10 files changed, 166 insertions(+), 100 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 9f839f3bf..f9d83dd91 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -25,6 +25,23 @@ EmitContext::EmitContext(IR::Program& program) { if (const size_t num = program.info.storage_buffers_descriptors.size(); num > 0) { Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); } + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vertex"; + break; + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: + stage_name = "primitive"; + break; + case Stage::Fragment: + stage_name = "fragment"; + break; + case Stage::Compute: + stage_name = "compute"; + break; + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 37663c1c8..4efe42ada 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -45,6 +45,8 @@ public: std::string code; RegAlloc reg_alloc{*this}; + + std::string_view stage_name = "invalid"; }; } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index ad27b8b06..8b42cbf79 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -189,6 +189,12 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "OPTION NV_shader_atomic_fp16_vector;"; } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask) { + header += "OPTION NV_shader_thread_group;"; + } + if (info.uses_subgroup_shuffles) { + header += "OPTION NV_shader_thread_shuffle;"; + } } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 1bbd02022..75613571f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -584,24 +584,24 @@ void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& ind ScalarU32 value); void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, ScalarU32 value); -void EmitLaneId(EmitContext& ctx); -void EmitVoteAll(EmitContext& ctx, ScalarS32 pred); -void EmitVoteAny(EmitContext& ctx, ScalarS32 pred); -void EmitVoteEqual(EmitContext& ctx, ScalarS32 pred); -void EmitSubgroupBallot(EmitContext& ctx, ScalarS32 pred); -void EmitSubgroupEqMask(EmitContext& ctx); -void EmitSubgroupLtMask(EmitContext& ctx); -void EmitSubgroupLeMask(EmitContext& ctx); -void EmitSubgroupGtMask(EmitContext& ctx); -void EmitSubgroupGeMask(EmitContext& ctx); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask); + const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask); + const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask); + const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask); + const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle); void EmitDPdxFine(EmitContext& ctx, ScalarF32 op_a); void EmitDPdyFine(EmitContext& ctx, ScalarF32 op_a); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 85110bcc9..3c0a74e3c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -21,9 +21,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitVoid(EmitContext& ctx) { - NotImplemented(); -} +void EmitVoid(EmitContext&) {} void EmitBranch(EmitContext& ctx) { NotImplemented(); @@ -636,84 +634,4 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value NotImplemented(); } -void EmitLaneId(EmitContext& ctx) { - NotImplemented(); -} - -void EmitVoteAll(EmitContext& ctx, ScalarS32 pred) { - NotImplemented(); -} - -void EmitVoteAny(EmitContext& ctx, ScalarS32 pred) { - NotImplemented(); -} - -void EmitVoteEqual(EmitContext& ctx, ScalarS32 pred) { - NotImplemented(); -} - -void EmitSubgroupBallot(EmitContext& ctx, ScalarS32 pred) { - NotImplemented(); -} - -void EmitSubgroupEqMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupLtMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupLeMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupGtMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupGeMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, - ScalarU32 clamp, ScalarU32 segmentation_mask) { - NotImplemented(); -} - -void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle) { - NotImplemented(); -} - -void EmitDPdxFine(EmitContext& ctx, ScalarF32 op_a) { - NotImplemented(); -} - -void EmitDPdyFine(EmitContext& ctx, ScalarF32 op_a) { - NotImplemented(); -} - -void EmitDPdxCoarse(EmitContext& ctx, ScalarF32 op_a) { - NotImplemented(); -} - -void EmitDPdyCoarse(EmitContext& ctx, ScalarF32 op_a) { - NotImplemented(); -} - } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index e69de29bb..37eb577cd 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -0,0 +1,118 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGALL.S {}.x,{};", inst, pred); +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGANY.S {}.x,{};", inst, pred); +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGEQ.S {}.x,{};", inst, pred); +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { + ctx.Add("TGBALLOT {}.x,{};", inst, pred); +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name); +} + +static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask, + std::string_view op) { + std::string mask; + if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { + mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); + } else { + mask = "RC"; + ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};", + ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)}, + ScalarU32{ctx.reg_alloc.Consume(clamp)}); + } + const Register value_ret{ctx.reg_alloc.Define(inst)}; + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (in_bounds) { + const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; + ctx.Add("SHF{}.U {},{},{},{};" + "MOV.U {}.x,{}.y;", + op, bounds_ret, value, index, mask, value_ret, bounds_ret); + in_bounds->Invalidate(); + } else { + ctx.Add("SHF{}.U {},{},{},{};" + "MOV.U {}.x,{}.y;", + op, value_ret, value, index, mask, value_ret, value_ret); + } +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX"); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP"); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN"); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, + const IR::Value& clamp, const IR::Value& segmentation_mask) { + Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); +} + +void EmitFSwizzleAdd(EmitContext&, ScalarF32, ScalarF32, ScalarU32) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitDPdxFine(EmitContext&, ScalarF32) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitDPdyFine(EmitContext&, ScalarF32) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitDPdxCoarse(EmitContext&, ScalarF32) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitDPdyCoarse(EmitContext&, ScalarF32) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index be88b76f7..9759591bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1168,7 +1168,7 @@ void EmitContext::DefineInputs(const Info& info) { subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); } - if (info.uses_subgroup_invocation_id || + if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || (profile.warp_size_potentially_larger_than_guest && (info.uses_subgroup_vote || info.uses_subgroup_mask))) { subgroup_local_invocation_id = diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0681dfd16..2dad87e87 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -318,7 +318,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } - if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { + if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id || + info.uses_subgroup_shuffles) && + profile.support_vote) { ctx.AddExtension("SPV_KHR_shader_ballot"); ctx.AddCapability(spv::Capability::SubgroupBallotKHR); if (!profile.warp_size_potentially_larger_than_guest) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 13b793d57..ea08aacc3 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -504,11 +504,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_is_helper_invocation = true; break; case IR::Opcode::LaneId: + info.uses_subgroup_invocation_id = true; + break; case IR::Opcode::ShuffleIndex: case IR::Opcode::ShuffleUp: case IR::Opcode::ShuffleDown: case IR::Opcode::ShuffleButterfly: - info.uses_subgroup_invocation_id = true; + info.uses_subgroup_shuffles = true; break; case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a50a9a18c..d6c32fbe5 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -116,6 +116,7 @@ struct Info { bool uses_sample_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; + bool uses_subgroup_shuffles{}; std::array uses_patches{}; std::array input_generics{}; From 70fbede213bfadfc4015b3227e57fca34bea46eb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 19:20:15 -0300 Subject: [PATCH 371/770] glasm: Review all GLASM insts to be aware of register aliasing --- .../backend/glasm/emit_glasm_composite.cpp | 19 +++++++-- .../glasm/emit_glasm_floating_point.cpp | 8 ++-- .../backend/glasm/emit_glasm_integer.cpp | 42 +++++++++++++------ .../backend/glasm/emit_glasm_select.cpp | 2 +- 4 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index 94dc5019d..22321f386 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -41,10 +41,23 @@ template void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, u32 index, char type) { const Register ret{ctx.reg_alloc.Define(inst)}; - if (ret != composite) { - ctx.Add("MOV.{} {},{};", type, ret, composite); + const char swizzle{"xyzw"[index]}; + if (ret != composite && ret == object) { + // The object is aliased with the return value, so we have to use a temporary to insert + ctx.Add("MOV.{} RC,{};" + "MOV.{} RC.{},{};" + "MOV.{} {},RC;", + type, composite, type, swizzle, object, type, ret); + } else if (ret != composite) { + // The input composite is not aliased with the return value so we have to copy it before + // hand. But the insert object is not aliased with the return value, so we don't have to + // worry about that + ctx.Add("MOV.{} {},{};MOV.{},{}.{},{};", type, ret, composite, type, ret, swizzle, object); + } else { + // The return value is alised so we can just insert the object, it doesn't matter if it's + // aliased + ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object); } - ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], object); } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index 15db6618f..d2c324ad6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -38,9 +38,9 @@ template void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, InputType max_value, std::string_view type) { // Call MAX first to properly clamp nan to min_value instead - ctx.Add("MAX.{} {}.x,{},{};" - "MIN.{} {}.x,{}.x,{};", - type, ret, min_value, value, type, ret, ret, max_value); + ctx.Add("MAX.{} RC.x,{},{};" + "MIN.{} {}.x,RC.x,{};", + type, min_value, value, type, ret, max_value); } } // Anonymous namespace @@ -159,7 +159,7 @@ void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Regis void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("RSQ {}.x,{};RCP {}.x,{}.x;", ret, value, ret, ret); + ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret); } void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 2be91ccfd..15fd23356 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -87,20 +87,38 @@ void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, ScalarS32 offset, ScalarS32 count) { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); - ctx.Add("BFI.S {},RC,{},{};", inst, insert, base); + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); + } else { + ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};" + "BFI.S {},RC,{},{};", + count, offset, ret, insert, base); + } } void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, ScalarS32 count) { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); - ctx.Add("BFE.S {},RC,{};", inst, base); + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); + } else { + ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};" + "BFE.S {},RC,{};", + count, offset, ret, base); + } } void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, ScalarU32 count) { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};", count, offset); - ctx.Add("BFE.U {},RC,{};", inst, base); + const Register ret{ctx.reg_alloc.Define(inst)}; + if (count.type != Type::Register && offset.type != Type::Register) { + ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); + } else { + ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};" + "BFE.U {},RC,{};", + count, offset, ret, base); + } } void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { @@ -141,16 +159,16 @@ void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.S {}.x,{},{};" - "MAX.S {}.x,{},{};", - ret, max, value, ret, ret, min); + ctx.Add("MIN.S RC.x,{},{};" + "MAX.S {}.x,RC.x,{};", + max, value, ret, min); } void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { const Register ret{ctx.reg_alloc.Define(inst)}; - ctx.Add("MIN.U {}.x,{},{};" - "MAX.U {}.x,{},{};", - ret, max, value, ret, ret, min); + ctx.Add("MIN.U RC.x,{},{};" + "MAX.U {}.x,RC.x,{};", + max, value, ret, min); } void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index cfde86047..b9e5cbbbe 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -43,7 +43,7 @@ void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register tr cond, ret, true_value); } else { ctx.Add("MOV.S.CC RC.x,{};" - "MOV.U64 {}.x(EQ.x),{};" + "MOV.U64 {}.x,{};" "MOV.U64 {}.x(NE.x),{};", cond, ret, false_value, ret, true_value); } From c917290497b313abe2f9ad6983050703615b1888 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 19:20:44 -0300 Subject: [PATCH 372/770] glasm: Enable unintentionally disabled register aliasing on GLASM --- .../backend/glasm/emit_glasm.cpp | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 8b42cbf79..c90b80e48 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -29,9 +29,9 @@ struct FuncTraits { template struct Identity { - Identity(const T& data_) : data{data_} {} + Identity(T data_) : data{data_} {} - const T& Extract() { + T Extract() { return data; } @@ -71,15 +71,12 @@ public: } } - ~RegWrapper() { + auto Extract() { if (inst) { reg_alloc.Unref(*inst); } else { reg_alloc.FreeReg(reg); } - } - - auto Extract() { return std::conditional_t{Value{reg}}; } @@ -95,13 +92,10 @@ public: ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} - ~ValueWrapper() { + ArgType Extract() { if (!ir_value.IsImmediate()) { reg_alloc.Unref(*ir_value.InstRecursive()); } - } - - ArgType Extract() { return value; } @@ -120,7 +114,7 @@ auto Arg(EmitContext& ctx, const IR::Value& arg) { } else if constexpr (std::is_base_of_v) { return ValueWrapper{ctx, arg}; } else if constexpr (std::is_same_v) { - return Identity{arg}; + return Identity{arg}; } else if constexpr (std::is_same_v) { return Identity{arg.U32()}; } else if constexpr (std::is_same_v) { @@ -137,9 +131,9 @@ auto Arg(EmitContext& ctx, const IR::Value& arg) { template void InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { if constexpr (is_first_arg_inst) { - func(ctx, *inst, std::forward(args.Extract())...); + func(ctx, *inst, args.Extract()...); } else { - func(ctx, std::forward(args.Extract())...); + func(ctx, args.Extract()...); } } @@ -147,10 +141,11 @@ template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; if constexpr (is_first_arg_inst) { - func(ctx, *inst, - Arg>(ctx, inst->Arg(I)).Extract()...); + InvokeCall( + ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - func(ctx, Arg>(ctx, inst->Arg(I)).Extract()...); + InvokeCall( + ctx, inst, Arg>(ctx, inst->Arg(I))...); } } From 8c81a20ace8c65d0a9d58b9cf333049a2bc0383a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 19:32:10 -0300 Subject: [PATCH 373/770] glasm: Ensure reg alloc order across compilers on GLASM Use a struct constructor to serialize register allocation arguments to ensure registers are allocated in the same order regardless of the compiler used. The A and B functions can be called in any order when passed as arguments to "foo": foo(A(), B()) But the order is guaranteed for curly-braced constructor calls in classes: Foo{A(), B()} Use this to get consistent behavior. --- .../backend/glasm/emit_glasm.cpp | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index c90b80e48..047b2f89c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -128,24 +128,27 @@ auto Arg(EmitContext& ctx, const IR::Value& arg) { } } -template -void InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { - if constexpr (is_first_arg_inst) { - func(ctx, *inst, args.Extract()...); - } else { - func(ctx, args.Extract()...); +template +struct InvokeCall { + template + InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { + if constexpr (is_first_arg_inst) { + func(ctx, *inst, args.Extract()...); + } else { + func(ctx, args.Extract()...); + } } -} +}; template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; if constexpr (is_first_arg_inst) { - InvokeCall( - ctx, inst, Arg>(ctx, inst->Arg(I))...); + InvokeCall{ + ctx, inst, Arg>(ctx, inst->Arg(I))...}; } else { - InvokeCall( - ctx, inst, Arg>(ctx, inst->Arg(I))...); + InvokeCall{ + ctx, inst, Arg>(ctx, inst->Arg(I))...}; } } From 7ff5851608031baca2adceb9f72e7c75eda9b3a9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 10 May 2021 22:35:16 -0400 Subject: [PATCH 374/770] glasm: Implement Storage atomics StorageAtomicExchangeU64 is failing test seemingly due to failure storing 64-bit result into the register --- .../backend/glasm/emit_glasm.cpp | 7 ++ .../backend/glasm/emit_glasm_atomic.cpp | 60 ++++++++++++ .../backend/glasm/emit_glasm_instructions.h | 38 +++++--- .../glasm/emit_glasm_not_implemented.cpp | 96 ------------------- .../glasm/emit_glasm_shared_memory.cpp | 64 +++++++++++++ 5 files changed, 156 insertions(+), 109 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 047b2f89c..056d8cbf8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -193,6 +193,9 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_subgroup_shuffles) { header += "OPTION NV_shader_thread_shuffle;"; } + // TODO: Track the shared atomic ops + header += + "OPTION NV_shader_storage_buffer;OPTION NV_gpu_program_fp64;OPTION NV_bindless_texture;"; } } // Anonymous namespace @@ -214,6 +217,10 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { default: break; } + if (program.shared_memory_size > 0) { + header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); + header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); + } header += "TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { header += fmt::format("R{},", index); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp index fe44c3d15..e72b252a3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp @@ -35,6 +35,66 @@ void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 } } // namespace +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value) { + ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); +} + void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarU32 value) { Atom(ctx, inst, binding, offset, value, "ADD", "U32"); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 75613571f..8202354fe 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -129,7 +129,7 @@ void EmitLoadSharedS8(EmitContext& ctx, ScalarU32 offset); void EmitLoadSharedU16(EmitContext& ctx, ScalarU32 offset); void EmitLoadSharedS16(EmitContext& ctx, ScalarU32 offset); void EmitLoadSharedU32(EmitContext& ctx, ScalarU32 offset); -void EmitLoadSharedU64(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); void EmitLoadSharedU128(EmitContext& ctx, ScalarU32 offset); void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); @@ -345,18 +345,30 @@ void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); -void EmitSharedAtomicIAdd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicSMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value); -void EmitSharedAtomicUMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicSMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value); -void EmitSharedAtomicUMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicInc32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicDec32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicAnd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicOr32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicXor32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicExchange32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value); -void EmitSharedAtomicExchange64(EmitContext& ctx, ScalarU32 pointer_offset, Register value); +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value); +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value); +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value); +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value); void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarU32 value); void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 3c0a74e3c..b40d09f8c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -199,54 +199,6 @@ void EmitUndefU64(EmitContext& ctx) { NotImplemented(); } -void EmitLoadSharedU8(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitLoadSharedS8(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitLoadSharedU16(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitLoadSharedS16(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitLoadSharedU32(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitLoadSharedU64(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitLoadSharedU128(EmitContext& ctx, ScalarU32 offset) { - NotImplemented(); -} - -void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { - NotImplemented(); -} - -void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } @@ -271,54 +223,6 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitSharedAtomicIAdd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicSMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value) { - NotImplemented(); -} - -void EmitSharedAtomicUMin32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicSMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarS32 value) { - NotImplemented(); -} - -void EmitSharedAtomicUMax32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicInc32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicDec32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicAnd32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicOr32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicXor32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicExchange32(EmitContext& ctx, ScalarU32 pointer_offset, ScalarU32 value) { - NotImplemented(); -} - -void EmitSharedAtomicExchange64(EmitContext& ctx, ScalarU32 pointer_offset, Register value) { - NotImplemented(); -} - void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { ctx.Add("OR.S {},{},{};", inst, a, b); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp index e69de29bb..32cc5d92c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp @@ -0,0 +1,64 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { +void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] ScalarU32 offset) { + ctx.LongAdd("LDS.U64 {},shared_mem[{}];", inst, offset); +} + +void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] ScalarU32 value) { + ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] Register value) { + ctx.Add("STS.U64 {},shared_mem[{}];", value, offset); +} + +void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} +} // namespace Shader::Backend::GLASM From d54d7de40e7295827b0e4e4026441b53d3fc9569 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:40:54 -0300 Subject: [PATCH 375/770] glasm: Rework control flow introducing a syntax list This commit regresses VertexA shaders, their transformation pass has to be adapted to the new control flow. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_glasm.cpp | 41 ++- .../backend/glasm/emit_glasm_instructions.h | 7 +- .../backend/glasm/emit_glasm_integer.cpp | 9 +- .../glasm/emit_glasm_not_implemented.cpp | 24 +- .../backend/spirv/emit_context.cpp | 9 - .../backend/spirv/emit_context.h | 1 - .../backend/spirv/emit_spirv.cpp | 71 +++++- .../backend/spirv/emit_spirv_control_flow.cpp | 33 +-- .../backend/spirv/emit_spirv_instructions.h | 9 +- .../frontend/ir/abstract_syntax_list.h | 56 +++++ .../frontend/ir/basic_block.cpp | 56 +---- .../frontend/ir/basic_block.h | 51 +--- .../frontend/ir/ir_emitter.cpp | 60 ++--- .../frontend/ir/ir_emitter.h | 11 +- .../frontend/ir/microinstruction.cpp | 11 +- src/shader_recompiler/frontend/ir/opcodes.h | 1 - src/shader_recompiler/frontend/ir/opcodes.inc | 21 +- .../frontend/ir/post_order.cpp | 36 ++- .../frontend/ir/post_order.h | 3 +- src/shader_recompiler/frontend/ir/program.h | 4 +- src/shader_recompiler/frontend/ir/type.h | 49 ++-- src/shader_recompiler/frontend/ir/value.cpp | 8 - src/shader_recompiler/frontend/ir/value.h | 9 - .../frontend/maxwell/program.cpp | 24 +- .../maxwell/structured_control_flow.cpp | 235 ++++++++++-------- .../maxwell/structured_control_flow.h | 12 +- .../frontend/maxwell/translate/translate.cpp | 7 +- .../frontend/maxwell/translate/translate.h | 2 +- .../ir_opt/constant_propagation_pass.cpp | 20 -- .../ir_opt/dual_vertex_pass.cpp | 56 +---- .../ir_opt/identity_removal_pass.cpp | 1 - .../ir_opt/ssa_rewrite_pass.cpp | 4 +- 33 files changed, 437 insertions(+), 505 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/abstract_syntax_list.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f829b8d32..0d55924a7 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -51,6 +51,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_warp.cpp environment.h exception.h + frontend/ir/abstract_syntax_list.h frontend/ir/attribute.cpp frontend/ir/attribute.h frontend/ir/basic_block.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 056d8cbf8..51ca83d18 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -117,8 +117,6 @@ auto Arg(EmitContext& ctx, const IR::Value& arg) { return Identity{arg}; } else if constexpr (std::is_same_v) { return Identity{arg.U32()}; - } else if constexpr (std::is_same_v) { - return Identity{arg.Label()}; } else if constexpr (std::is_same_v) { return Identity{arg.Attribute()}; } else if constexpr (std::is_same_v) { @@ -177,6 +175,39 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } +void EmitCode(EmitContext& ctx, const IR::Program& program) { + const auto eval{ + [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("MOV.S.CC RC,{};IF NE.x;", eval(node.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("ENDIF;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("REP;"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + ctx.Add("MOV.S.CC RC,{};BRK NE.x;ENDREP;", eval(node.repeat.cond)); + break; + case IR::AbstractSyntaxNode::Type::Break: + ctx.Add("MOV.S.CC RC,{};BRK NE.x;", eval(node.repeat.cond)); + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("RET;"); + break; + } + } +} + void SetupOptions(std::string& header, Info info) { if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; @@ -201,11 +232,7 @@ void SetupOptions(std::string& header, Info info) { std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { EmitContext ctx{program}; - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } - } + EmitCode(ctx, program); std::string header = "!!NVcp5.0\n" "OPTION NV_internal;"; SetupOptions(header, program.info); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 8202354fe..0f7f16e6e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -22,13 +22,8 @@ class EmitContext; void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBranch(EmitContext& ctx); -void EmitBranchConditional(EmitContext& ctx); -void EmitLoopMerge(EmitContext& ctx); -void EmitSelectionMerge(EmitContext& ctx); -void EmitReturn(EmitContext& ctx); +void EmitBranchConditionRef(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 15fd23356..adcc0404b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -91,7 +91,8 @@ void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scalar if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); } else { - ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};" + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" "BFI.S {},RC,{},{};", count, offset, ret, insert, base); } @@ -103,7 +104,8 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); } else { - ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};" + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" "BFE.S {},RC,{};", count, offset, ret, base); } @@ -115,7 +117,8 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); } else { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};" + ctx.Add("MOV.U RC.x,{};" + "MOV.U RC.y,{};" "BFE.U {},RC,{};", count, offset, ret, base); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index b40d09f8c..f37ad5587 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -23,34 +23,12 @@ void EmitPhi(EmitContext& ctx, IR::Inst& inst) { void EmitVoid(EmitContext&) {} -void EmitBranch(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBranchConditional(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoopMerge(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSelectionMerge(EmitContext& ctx) { - NotImplemented(); -} - -void EmitReturn(EmitContext& ctx) { - ctx.Add("RET;"); -} +void EmitBranchConditionRef(EmitContext&) {} void EmitJoin(EmitContext& ctx) { NotImplemented(); } -void EmitUnreachable(EmitContext& ctx) { - NotImplemented(); -} - void EmitDemoteToHelperInvocation(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 9759591bd..a98e08392 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -463,7 +463,6 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings DefineImages(program.info, image_binding); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); - DefineLabels(program); } EmitContext::~EmitContext() = default; @@ -487,8 +486,6 @@ Id EmitContext::Def(const IR::Value& value) { return Const(value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); - case IR::Type::Label: - return value.Label()->Definition(); default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -1139,12 +1136,6 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineLabels(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - block->SetDefinition(OpLabel()); - } -} - void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8b000f1ec..d2b79f6c1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -296,7 +296,6 @@ private: void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); void DefineOutputs(const IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2dad87e87..c22edfec2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -41,8 +41,6 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg; } else if constexpr (std::is_same_v) { return arg.U32(); - } else if constexpr (std::is_same_v) { - return arg.Label(); } else if constexpr (std::is_same_v) { return arg.Attribute(); } else if constexpr (std::is_same_v) { @@ -109,15 +107,74 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { } } +void Traverse(EmitContext& ctx, IR::Program& program) { + IR::Block* current_block{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + const Id label{node.block->Definition()}; + if (current_block) { + ctx.OpBranch(label); + } + current_block = node.block; + ctx.AddLabel(label); + for (IR::Inst& inst : node.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: { + const Id if_label{node.if_node.body->Definition()}; + const Id endif_label{node.if_node.merge->Definition()}; + ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.Def(node.if_node.cond), if_label, endif_label); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: { + const Id body_label{node.loop.body->Definition()}; + const Id continue_label{node.loop.continue_block->Definition()}; + const Id endloop_label{node.loop.merge->Definition()}; + + ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); + ctx.OpBranch(node.loop.body->Definition()); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + const Id break_label{node.break_node.merge->Definition()}; + const Id skip_label{node.break_node.skip->Definition()}; + ctx.OpBranchConditional(ctx.Def(node.break_node.cond), break_label, skip_label); + break; + } + case IR::AbstractSyntaxNode::Type::EndIf: + if (current_block) { + ctx.OpBranch(node.end_if.merge->Definition()); + } + break; + case IR::AbstractSyntaxNode::Type::Repeat: { + const Id loop_header_label{node.repeat.loop_header->Definition()}; + const Id merge_label{node.repeat.merge->Definition()}; + ctx.OpBranchConditional(ctx.Def(node.repeat.cond), loop_header_label, merge_label); + break; + } + case IR::AbstractSyntaxNode::Type::Return: + ctx.OpReturn(); + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.OpUnreachable(); + break; + } + if (node.type != IR::AbstractSyntaxNode::Type::Block) { + current_block = nullptr; + } + } +} + Id DefineMain(EmitContext& ctx, IR::Program& program) { const Id void_function{ctx.TypeFunction(ctx.void_id)}; const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; for (IR::Block* const block : program.blocks) { - ctx.AddLabel(block->Definition()); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } + block->SetDefinition(ctx.OpLabel()); } + Traverse(ctx, program); ctx.OpFunctionEnd(); return main; } @@ -411,6 +468,8 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return id; } +void EmitBranchConditionRef(EmitContext&) {} + void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6154c46be..d33486f28 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -7,40 +7,21 @@ namespace Shader::Backend::SPIRV { -void EmitBranch(EmitContext& ctx, Id label) { - ctx.OpBranch(label); -} - -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { - ctx.OpBranchConditional(condition, true_label, false_label); -} - -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { - ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); -} - -void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { - ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); -} - -void EmitReturn(EmitContext& ctx) { - ctx.OpReturn(); -} - void EmitJoin(EmitContext&) { throw NotImplementedException("Join shouldn't be emitted"); } -void EmitUnreachable(EmitContext& ctx) { - ctx.OpUnreachable(); -} - -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { +void EmitDemoteToHelperInvocation(EmitContext& ctx) { if (ctx.profile.support_demote_to_helper_invocation) { ctx.OpDemoteToHelperInvocationEXT(); - ctx.OpBranch(continue_label); } else { + const Id kill_label{ctx.OpLabel()}; + const Id impossible_label{ctx.OpLabel()}; + ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label); + ctx.AddLabel(kill_label); ctx.OpKill(); + ctx.AddLabel(impossible_label); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a1ca3f43d..2f4f6e59e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -23,14 +23,9 @@ class EmitContext; Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); +void EmitBranchConditionRef(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h new file mode 100644 index 000000000..1366414c2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class Block; + +struct AbstractSyntaxNode { + enum class Type { + Block, + If, + EndIf, + Loop, + Repeat, + Break, + Return, + Unreachable, + }; + Type type{}; + union { + Block* block{}; + struct { + U1 cond; + Block* body; + Block* merge; + } if_node; + struct { + Block* merge; + } end_if; + struct { + Block* body; + Block* continue_block; + Block* merge; + } loop; + struct { + U1 cond; + Block* loop_header; + Block* merge; + } repeat; + struct { + U1 cond; + Block* merge; + Block* skip; + } break_node; + }; +}; +using AbstractSyntaxList = std::vector; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index f92fc2571..7c08b25ce 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -14,10 +14,7 @@ namespace Shader::IR { -Block::Block(ObjectPool& inst_pool_, u32 begin, u32 end) - : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} - -Block::Block(ObjectPool& inst_pool_) : Block{inst_pool_, 0, 0} {} +Block::Block(ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} Block::~Block() = default; @@ -40,39 +37,15 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { - branch_cond = cond; - branch_true = branch_true_; - branch_false = branch_false_; -} - -void Block::SetBranch(Block* branch) { - branch_cond = Condition{true}; - branch_true = branch; -} - -void Block::SetReturn() { - branch_cond = Condition{true}; - branch_true = nullptr; - branch_false = nullptr; -} - -bool Block::IsVirtual() const noexcept { - return location_begin == location_end; -} - -u32 Block::LocationBegin() const noexcept { - return location_begin; -} - -u32 Block::LocationEnd() const noexcept { - return location_end; -} - -void Block::AddImmediatePredecessor(Block* block) { - if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { - imm_predecessors.push_back(block); +void Block::AddBranch(Block* block) { + if (std::ranges::find(imm_successors, block) != imm_successors.end()) { + throw LogicError("Successor already inserted"); } + if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { + throw LogicError("Predecessor already inserted"); + } + imm_successors.push_back(block); + block->imm_predecessors.push_back(this); } static std::string BlockToIndex(const std::map& block_to_index, @@ -92,15 +65,11 @@ static size_t InstIndex(std::map& inst_to_index, size_t& in return it->second; } -static std::string ArgToIndex(const std::map& block_to_index, - std::map& inst_to_index, size_t& inst_index, +static std::string ArgToIndex(std::map& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return ""; } - if (arg.IsLabel()) { - return BlockToIndex(block_to_index, arg.Label()); - } if (!arg.IsImmediate() || arg.IsIdentity()) { return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } @@ -140,8 +109,7 @@ std::string DumpBlock(const Block& block, const std::map& if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { ret += fmt::format(" ${}", it->second); } - ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); - + ret += '\n'; for (const Inst& inst : block) { const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); @@ -153,7 +121,7 @@ std::string DumpBlock(const Block& block, const std::map& const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; + const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_str, diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 0b0c97af6..7e134b4c7 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -12,6 +12,7 @@ #include #include "common/bit_cast.h" +#include "common/common_types.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" @@ -27,7 +28,6 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(ObjectPool& inst_pool_, u32 begin, u32 end); explicit Block(ObjectPool& inst_pool_); ~Block(); @@ -44,22 +44,8 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u32 flags = 0); - /// Set the branches to jump to when all instructions have executed. - void SetBranches(Condition cond, Block* branch_true, Block* branch_false); - /// Set the branch to unconditionally jump to when all instructions have executed. - void SetBranch(Block* branch); - /// Mark the block as a return block. - void SetReturn(); - - /// Returns true when the block does not implement any guest instructions directly. - [[nodiscard]] bool IsVirtual() const noexcept; - /// Gets the starting location of this basic block. - [[nodiscard]] u32 LocationBegin() const noexcept; - /// Gets the end location for this basic block. - [[nodiscard]] u32 LocationEnd() const noexcept; - - /// Adds a new immediate predecessor to this basic block. - void AddImmediatePredecessor(Block* block); + /// Adds a new branch to this basic block. + void AddBranch(Block* block); /// Gets a mutable reference to the instruction list for this basic block. [[nodiscard]] InstructionList& Instructions() noexcept { @@ -71,9 +57,13 @@ public: } /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span ImmediatePredecessors() const noexcept { + [[nodiscard]] std::span ImmPredecessors() const noexcept { return imm_predecessors; } + /// Gets an immutable span to the immediate successors. + [[nodiscard]] std::span ImmSuccessors() const noexcept { + return imm_successors; + } /// Intrusively store the host definition of this instruction. template @@ -87,19 +77,6 @@ public: return Common::BitCast(definition); } - [[nodiscard]] Condition BranchCondition() const noexcept { - return branch_cond; - } - [[nodiscard]] bool IsTerminationBlock() const noexcept { - return !branch_true && !branch_false; - } - [[nodiscard]] Block* TrueBranch() const noexcept { - return branch_true; - } - [[nodiscard]] Block* FalseBranch() const noexcept { - return branch_false; - } - void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { ssa_reg_values[RegIndex(reg)] = value; } @@ -178,22 +155,14 @@ public: private: /// Memory pool for instruction list ObjectPool* inst_pool; - /// Starting location of this block - u32 location_begin; - /// End location of this block - u32 location_end; /// List of instructions in this block InstructionList instructions; - /// Condition to choose the branch to take - Condition branch_cond{true}; - /// Block to jump into when the branch condition evaluates as true - Block* branch_true{nullptr}; - /// Block to jump into when the branch condition evaluates as false - Block* branch_false{nullptr}; /// Block immediate predecessors std::vector imm_predecessors; + /// Block immediate successors + std::vector imm_successors; /// Intrusively store the value of a register in the block. std::array ssa_reg_values; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce6c9af07..eb45aa477 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,25 +61,28 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(Block* label) { - label->AddImmediatePredecessor(block); - block->SetBranch(label); - Inst(Opcode::Branch, label); +void IREmitter::Prologue() { + Inst(Opcode::Prologue); } -void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { - block->SetBranches(IR::Condition{true}, true_label, false_label); - true_label->AddImmediatePredecessor(block); - false_label->AddImmediatePredecessor(block); - Inst(Opcode::BranchConditional, condition, true_label, false_label); +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); } -void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { - Inst(Opcode::LoopMerge, merge_block, continue_target); +void IREmitter::BranchConditionRef(const U1& cond) { + Inst(Opcode::BranchConditionRef, cond); } -void IREmitter::SelectionMerge(Block* merge_block) { - Inst(Opcode::SelectionMerge, merge_block); +void IREmitter::DemoteToHelperInvocation() { + Inst(Opcode::DemoteToHelperInvocation); +} + +void IREmitter::EmitVertex(const U32& stream) { + Inst(Opcode::EmitVertex, stream); +} + +void IREmitter::EndPrimitive(const U32& stream) { + Inst(Opcode::EndPrimitive, stream); } void IREmitter::Barrier() { @@ -94,37 +97,6 @@ void IREmitter::DeviceMemoryBarrier() { Inst(Opcode::DeviceMemoryBarrier); } -void IREmitter::Return() { - block->SetReturn(); - Inst(Opcode::Return); -} - -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); -} - -void IREmitter::DemoteToHelperInvocation(Block* continue_label) { - block->SetBranch(continue_label); - continue_label->AddImmediatePredecessor(block); - Inst(Opcode::DemoteToHelperInvocation, continue_label); -} - -void IREmitter::Prologue() { - Inst(Opcode::Prologue); -} - -void IREmitter::Epilogue() { - Inst(Opcode::Epilogue); -} - -void IREmitter::EmitVertex(const U32& stream) { - Inst(Opcode::EmitVertex, stream); -} - -void IREmitter::EndPrimitive(const U32& stream) { - Inst(Opcode::EndPrimitive, stream); -} - U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index fd41b7e89..7a83c33d3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,17 +32,10 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(Block* label); - void BranchConditional(const U1& condition, Block* true_label, Block* false_label); - void LoopMerge(Block* merge_block, Block* continue_target); - void SelectionMerge(Block* merge_block); - void Return(); - void Unreachable(); - void DemoteToHelperInvocation(Block* continue_label); - void Prologue(); void Epilogue(); - + void BranchConditionRef(const U1& cond); + void DemoteToHelperInvocation(); void EmitVertex(const U32& stream); void EndPrimitive(const U32& stream); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 616ef17d4..364574240 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,19 +56,14 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { - case Opcode::Branch: - case Opcode::BranchConditional: - case Opcode::LoopMerge: - case Opcode::SelectionMerge: - case Opcode::Return: + case Opcode::Prologue: + case Opcode::Epilogue: + case Opcode::BranchConditionRef: case Opcode::Join: - case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: case Opcode::WorkgroupMemoryBarrier: case Opcode::DeviceMemoryBarrier: - case Opcode::Prologue: - case Opcode::Epilogue: case Opcode::EmitVertex: case Opcode::EndPrimitive: case Opcode::SetAttribute: diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 2b9c0ed8c..56b001902 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -30,7 +30,6 @@ struct OpcodeMeta { // using enum Type; constexpr Type Void{Type::Void}; constexpr Type Opaque{Type::Opaque}; -constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9165421f8..75ddb6b6f 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -7,27 +7,20 @@ OPCODE(Phi, Opaque, OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) -// Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) +OPCODE(BranchConditionRef, Void, U1, ) OPCODE(Join, Void, ) -OPCODE(Unreachable, Void, ) -OPCODE(DemoteToHelperInvocation, Void, Label, ) +OPCODE(DemoteToHelperInvocation, Void, ) +OPCODE(EmitVertex, Void, U32, ) +OPCODE(EndPrimitive, Void, U32, ) // Barriers OPCODE(Barrier, Void, ) OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) -// Special operations -OPCODE(Prologue, Void, ) -OPCODE(Epilogue, Void, ) -OPCODE(EmitVertex, Void, U32, ) -OPCODE(EndPrimitive, Void, U32, ) - // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index 8709a2ea1..1a28df7fb 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include #include @@ -10,35 +12,31 @@ namespace Shader::IR { -BlockList PostOrder(const BlockList& blocks) { +BlockList PostOrder(const AbstractSyntaxNode& root) { boost::container::small_vector block_stack; boost::container::flat_set visited; - BlockList post_order_blocks; - post_order_blocks.reserve(blocks.size()); - Block* const first_block{blocks.front()}; + if (root.type != AbstractSyntaxNode::Type::Block) { + throw LogicError("First node in abstract syntax list root is not a block"); + } + Block* const first_block{root.block}; visited.insert(first_block); block_stack.push_back(first_block); - const auto visit_branch = [&](Block* block, Block* branch) { - if (!branch) { - return false; - } - if (!visited.insert(branch).second) { - return false; - } - // Calling push_back twice is faster than insert on MSVC - block_stack.push_back(block); - block_stack.push_back(branch); - return true; - }; while (!block_stack.empty()) { Block* const block{block_stack.back()}; + const auto visit{[&](Block* branch) { + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on MSVC + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }}; block_stack.pop_back(); - - if (!visit_branch(block, block->TrueBranch()) && - !visit_branch(block, block->FalseBranch())) { + if (std::ranges::none_of(block->ImmSuccessors(), visit)) { post_order_blocks.push_back(block); } } diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h index 30137ff57..58a0467a0 100644 --- a/src/shader_recompiler/frontend/ir/post_order.h +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -5,9 +5,10 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" namespace Shader::IR { -BlockList PostOrder(const BlockList& blocks); +BlockList PostOrder(const AbstractSyntaxNode& root); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 51e1a8c77..9ede5b48d 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -7,8 +7,7 @@ #include #include -#include - +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/program_header.h" #include "shader_recompiler/shader_info.h" @@ -17,6 +16,7 @@ namespace Shader::IR { struct Program { + AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; Info info; diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 8b3b33852..294b230c4 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -16,31 +16,30 @@ namespace Shader::IR { enum class Type { Void = 0, Opaque = 1 << 0, - Label = 1 << 1, - Reg = 1 << 2, - Pred = 1 << 3, - Attribute = 1 << 4, - Patch = 1 << 5, - U1 = 1 << 6, - U8 = 1 << 7, - U16 = 1 << 8, - U32 = 1 << 9, - U64 = 1 << 10, - F16 = 1 << 11, - F32 = 1 << 12, - F64 = 1 << 13, - U32x2 = 1 << 14, - U32x3 = 1 << 15, - U32x4 = 1 << 16, - F16x2 = 1 << 17, - F16x3 = 1 << 18, - F16x4 = 1 << 19, - F32x2 = 1 << 20, - F32x3 = 1 << 21, - F32x4 = 1 << 22, - F64x2 = 1 << 23, - F64x3 = 1 << 24, - F64x4 = 1 << 25, + Reg = 1 << 1, + Pred = 1 << 2, + Attribute = 1 << 3, + Patch = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index b962f170d..d365ea1bc 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -9,8 +9,6 @@ namespace Shader::IR { Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} -Value::Value(IR::Block* value) noexcept : type{Type::Label}, label{value} {} - Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} @@ -33,10 +31,6 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} -bool Value::IsLabel() const noexcept { - return type == Type::Label; -} - IR::Type Value::Type() const noexcept { if (IsPhi()) { // The type of a phi node is stored in its flags @@ -60,8 +54,6 @@ bool Value::operator==(const Value& other) const { return true; case Type::Opaque: return inst == other.inst; - case Type::Label: - return label == other.label; case Type::Reg: return reg == other.reg; case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index beaf149f3..2ce49f953 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -37,7 +37,6 @@ class Value { public: Value() noexcept = default; explicit Value(IR::Inst* value) noexcept; - explicit Value(IR::Block* value) noexcept; explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; @@ -54,11 +53,9 @@ public: [[nodiscard]] bool IsPhi() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsImmediate() const noexcept; - [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; [[nodiscard]] IR::Inst* Inst() const; - [[nodiscard]] IR::Block* Label() const; [[nodiscard]] IR::Inst* InstRecursive() const; [[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Reg Reg() const; @@ -80,7 +77,6 @@ private: IR::Type type{}; union { IR::Inst* inst{}; - IR::Block* label; IR::Reg reg; IR::Pred pred; IR::Attribute attribute; @@ -304,11 +300,6 @@ inline IR::Inst* Value::Inst() const { return inst; } -inline IR::Block* Value::Label() const { - DEBUG_ASSERT(type == Type::Label); - return label; -} - inline IR::Inst* Value::InstRecursive() const { DEBUG_ASSERT(type == Type::Opaque); if (IsIdentity()) { diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0d3f00699..017c4b8fd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "shader_recompiler/frontend/ir/basic_block.h" @@ -15,6 +16,16 @@ namespace Shader::Maxwell { namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.block; }); + return blocks; +} + void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch @@ -23,7 +34,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { } const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); } @@ -110,8 +121,9 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - program.blocks = VisitAST(inst_pool, block_pool, env, cfg); - program.post_order_blocks = PostOrder(program.blocks); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); program.local_memory_size = env.LocalMemorySize(); switch (program.stage) { @@ -159,9 +171,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); std::swap(result.blocks, vertex_a.blocks); - for (IR::Block* block : vertex_b.blocks) { - result.blocks.push_back(block); - } + result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); @@ -173,7 +183,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.blocks); + result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cc5410c6d..e7e2e9c82 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -36,7 +36,6 @@ using Tree = boost::intrusive::list>; using Node = Tree::iterator; -using ConstNode = Tree::const_iterator; enum class StatementType { Code, @@ -91,7 +90,8 @@ struct IndirectBranchCond {}; #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement #endif struct Statement : ListBaseHook { - Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(const Flow::Block* block_, Statement* up_) + : block{block_}, up{up_}, type{StatementType::Code} {} Statement(Goto, Statement* cond_, Node label_, Statement* up_) : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} @@ -125,7 +125,7 @@ struct Statement : ListBaseHook { } union { - IR::Block* code; + const Flow::Block* block; Node label; Tree children; IR::Condition guest_cond; @@ -171,8 +171,8 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->code->LocationBegin(), stmt->code->LocationEnd(), - reinterpret_cast(stmt->code)); + stmt->block->begin, stmt->block->end, + reinterpret_cast(stmt->block)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -407,11 +407,7 @@ private: }}; root.push_front(make_reset_variable()); root.insert(ip, make_reset_variable()); - - const u32 begin_offset{block.begin.Offset()}; - const u32 end_offset{block.end.Offset()}; - IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; - root.insert(ip, *pool.Create(ir_block, &root_stmt)); + root.insert(ip, *pool.Create(&block, &root_stmt)); switch (block.end_class) { case Flow::EndClass::Branch: { @@ -620,13 +616,13 @@ private: Statement root_stmt{FunctionTag{}}; }; -IR::Block* TryFindForwardBlock(const Statement& stmt) { - const Tree& tree{stmt.up->children}; - const ConstNode end{tree.cend()}; - ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; +[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { + Tree& tree{stmt.up->children}; + const Node end{tree.end()}; + Node forward_node{std::next(Tree::s_iterator_to(stmt))}; while (forward_node != end && !HasChildren(forward_node->type)) { if (forward_node->type == StatementType::Code) { - return forward_node->code; + return &*forward_node; } ++forward_node; } @@ -654,21 +650,29 @@ class TranslatePass { public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::BlockList& block_list_) + IR::AbstractSyntaxList& syntax_list_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, - block_list{block_list_} { + syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*block_list.front()}; + IR::Block& first_block{*syntax_list.front().block}; IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } private: - void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { + IR::Block* current_block{}; + const auto ensure_block{[&] { + if (current_block) { + return; + } + current_block = block_pool.Create(inst_pool); + auto& node{syntax_list.emplace_back()}; + node.type = IR::AbstractSyntaxNode::Type::Block; + node.block = current_block; + }}; Tree& tree{parent.children}; - IR::Block* current_block{nullptr}; - for (auto it = tree.begin(); it != tree.end(); ++it) { Statement& stmt{*it}; switch (stmt.type) { @@ -676,124 +680,157 @@ private: // Labels can be ignored break; case StatementType::Code: { - if (current_block && current_block != stmt.code) { - IR::IREmitter{*current_block}.Branch(stmt.code); - } - current_block = stmt.code; - Translate(env, stmt.code); - block_list.push_back(stmt.code); + ensure_block(); + Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); break; } case StatementType::SetVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } case StatementType::SetIndirectBranchVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; ir.SetIndirectBranchVariable(address); break; } case StatementType::If: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const merge_block{MergeBlock(parent, stmt)}; - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, merge_block, break_block); - // Implement if header block - IR::Block* const first_if_block{block_list.at(first_block_index)}; IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.SelectionMerge(merge_block); - ir.BranchConditional(cond, first_if_block, merge_block); + ir.BranchConditionRef(cond); + const size_t if_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t then_block_index{syntax_list.size()}; + Visit(stmt, break_block, merge_block); + + IR::Block* const then_block{syntax_list.at(then_block_index).block}; + current_block->AddBranch(then_block); + current_block->AddBranch(merge_block); current_block = merge_block; + + auto& if_node{syntax_list[if_node_index]}; + if_node.type = IR::AbstractSyntaxNode::Type::If; + if_node.if_node.cond = cond; + if_node.if_node.body = then_block; + if_node.if_node.merge = merge_block; + + auto& endif_node{syntax_list.emplace_back()}; + endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; + endif_node.end_if.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; if (current_block) { - IR::IREmitter{*current_block}.Branch(loop_header_block); + current_block->AddBranch(loop_header_block); } - block_list.push_back(loop_header_block); + auto& header_node{syntax_list.emplace_back()}; + header_node.type = IR::AbstractSyntaxNode::Type::Block; + header_node.block = loop_header_block; - IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; + const size_t loop_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, new_continue_block, merge_block); + const size_t body_block_index{syntax_list.size()}; + Visit(stmt, merge_block, continue_block); // The continue block is located at the end of the loop - block_list.push_back(new_continue_block); + IR::IREmitter ir{*continue_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); - // Implement loop header block - IR::Block* const first_loop_block{block_list.at(first_block_index)}; - IR::IREmitter ir{*loop_header_block}; - ir.LoopMerge(merge_block, new_continue_block); - ir.Branch(first_loop_block); + IR::Block* const body_block{syntax_list.at(body_block_index).block}; + loop_header_block->AddBranch(body_block); - // Implement continue block - IR::IREmitter continue_ir{*new_continue_block}; - const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; - continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + continue_block->AddBranch(loop_header_block); + continue_block->AddBranch(merge_block); current_block = merge_block; + + auto& loop{syntax_list[loop_node_index]}; + loop.type = IR::AbstractSyntaxNode::Type::Loop; + loop.loop.body = body_block; + loop.loop.continue_block = continue_block; + loop.loop.merge = merge_block; + + auto& continue_block_node{syntax_list.emplace_back()}; + continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; + continue_block_node.block = continue_block; + + auto& repeat{syntax_list.emplace_back()}; + repeat.type = IR::AbstractSyntaxNode::Type::Repeat; + repeat.repeat.cond = cond; + repeat.repeat.loop_header = loop_header_block; + repeat.repeat.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Break: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); - + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); + current_block->AddBranch(break_block); + current_block->AddBranch(skip_block); current_block = skip_block; + + auto& break_node{syntax_list.emplace_back()}; + break_node.type = IR::AbstractSyntaxNode::Type::Break; + break_node.break_node.cond = cond; + break_node.break_node.merge = break_block; + break_node.break_node.skip = skip_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = skip_block; break; } case StatementType::Return: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter ir{*current_block}; - ir.Epilogue(); - ir.Return(); + ensure_block(); + IR::IREmitter{*current_block}.Epilogue(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; } case StatementType::Kill: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* demote_block{MergeBlock(parent, stmt)}; - IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + IR::IREmitter{*current_block}.DemoteToHelperInvocation(); + current_block->AddBranch(demote_block); current_block = demote_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = demote_block; break; } case StatementType::Unreachable: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter{*current_block}.Unreachable(); + ensure_block(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; break; } default: @@ -801,42 +838,42 @@ private: } } if (current_block) { - IR::IREmitter ir{*current_block}; - if (continue_block) { - ir.Branch(continue_block); + if (fallthrough_block) { + current_block->AddBranch(fallthrough_block); } else { - ir.Unreachable(); + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; } } } IR::Block* MergeBlock(Statement& parent, Statement& stmt) { - if (IR::Block* const block{TryFindForwardBlock(stmt)}) { - return block; + Statement* merge_stmt{TryFindForwardBlock(stmt)}; + if (!merge_stmt) { + // Create a merge block we can visit later + merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); } - // Create a merge block we can visit later - IR::Block* const block{block_pool.Create(inst_pool)}; - Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; - parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); - return block; + return block_pool.Create(inst_pool); } ObjectPool& stmt_pool; ObjectPool& inst_pool; ObjectPool& block_pool; Environment& env; - IR::BlockList& block_list; + IR::AbstractSyntaxList& syntax_list; + // TODO: Make this constexpr when std::vector is constexpr + const Flow::Block dummy_flow_block; }; } // Anonymous namespace -IR::BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { +IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; Statement& root{goto_pass.RootStatement()}; - IR::BlockList block_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; - return block_list; + IR::AbstractSyntaxList syntax_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + return syntax_list; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index a6be12ba2..88b083649 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -4,12 +4,8 @@ #pragma once -#include -#include - -#include - #include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -17,8 +13,8 @@ namespace Shader::Maxwell { -[[nodiscard]] IR::BlockList VisitAST(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); +[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index f1230f58f..0f4e7a251 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,13 +23,12 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -void Translate(Environment& env, IR::Block* block) { - if (block->IsVirtual()) { +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { + if (location_begin == location_end) { return; } TranslatorVisitor visitor{env, *block}; - const Location pc_end{block->LocationEnd()}; - for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { + for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index e1aa2e0f4..a3edd2e46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -9,6 +9,6 @@ namespace Shader::Maxwell { -void Translate(Environment& env, IR::Block* block); +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index b1c45d13a..66f1391db 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -353,24 +353,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg>(inst.Arg(I))...)}; } -void FoldBranchConditional(IR::Inst& inst) { - const IR::U1 cond{inst.Arg(0)}; - if (cond.IsImmediate()) { - // TODO: Convert to Branch - return; - } - const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { - const IR::Value true_label{inst.Arg(1)}; - const IR::Value false_label{inst.Arg(2)}; - // Remove negation on the conditional (take the parameter out of LogicalNot) and swap - // the branches - inst.SetArg(0, cond_inst->Arg(0)); - inst.SetArg(1, false_label); - inst.SetArg(2, true_label); - } -} - std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; @@ -581,8 +563,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; - case IR::Opcode::BranchConditional: - return FoldBranchConditional(inst); case IR::Opcode::CompositeExtractF32x2: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, IR::Opcode::CompositeInsertF32x2); diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index f2d7db0e6..b0a9f5258 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,60 +13,16 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program& program) { - bool replaced_join{}; - bool eliminated_epilogue{}; - for (IR::Block* const block : program.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - switch (inst.GetOpcode()) { - case IR::Opcode::Return: - inst.ReplaceOpcode(IR::Opcode::Join); - replaced_join = true; - break; - case IR::Opcode::Epilogue: - inst.Invalidate(); - eliminated_epilogue = true; - break; - default: - break; - } - if (replaced_join && eliminated_epilogue) { - return; - } - } - } +void VertexATransformPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } -void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Prologue) { - return inst.Invalidate(); - } - } - } +void VertexBTransformPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } -void DualVertexJoinPass(IR::Program& program) { - const auto& blocks = program.blocks; - const s64 sub_size = static_cast(blocks.size()) - 1; - if (sub_size < 1) { - throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); - } - for (s64 index = 0; index < sub_size; ++index) { - IR::Block* const current_block{blocks[index]}; - IR::Block* const next_block{blocks[index + 1]}; - for (IR::Inst& inst : current_block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Join) { - IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; - ir.Branch(next_block); - inst.Invalidate(); - // Only 1 join should exist - return; - } - } - } - throw LogicError("Dual Vertex Join pass failed, no join present"); +void DualVertexJoinPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 6afbe24f7..e9b55f835 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -12,7 +12,6 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Program& program) { std::vector to_invalidate; - for (IR::Block* const block : program.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a8064a5d0..26eb3a3ab 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -202,7 +202,7 @@ public: incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; - } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + } else if (const std::span imm_preds = block->ImmPredecessors(); imm_preds.size() == 1) { // Optimize the common case of one predecessor: no phi needed stack.back().pc = Status::SetValue; @@ -257,7 +257,7 @@ public: private: template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { - for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { + for (IR::Block* const imm_pred : block->ImmPredecessors()) { phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); From 568d813eeadfc7888bad72d4f5d695c9d745cfe5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:43:19 -0300 Subject: [PATCH 376/770] vk_update_descriptor: Properly initialize payload on the update descriptor queue --- src/video_core/renderer_vulkan/vk_update_descriptor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index ce3427c9b..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -15,7 +15,9 @@ namespace Vulkan { VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) - : device{device_}, scheduler{scheduler_} {} + : device{device_}, scheduler{scheduler_} { + payload_cursor = payload.data(); +} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; From d4385c34e3aee6718502a1c5bc814535a657dc4f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 02:09:33 -0300 Subject: [PATCH 377/770] glasm: Declare NV_shader_thread_group when needed --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 51ca83d18..fa48ba25c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -218,15 +218,16 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "OPTION NV_shader_atomic_fp16_vector;"; } - if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask) { + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote) { header += "OPTION NV_shader_thread_group;"; } if (info.uses_subgroup_shuffles) { header += "OPTION NV_shader_thread_shuffle;"; } // TODO: Track the shared atomic ops - header += - "OPTION NV_shader_storage_buffer;OPTION NV_gpu_program_fp64;OPTION NV_bindless_texture;"; + header += "OPTION NV_shader_storage_buffer;" + "OPTION NV_gpu_program_fp64;" + "OPTION NV_bindless_texture;"; } } // Anonymous namespace From 0f88fb5d72401e87b2d33aab5fad7cc2a0002fc3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 02:10:03 -0300 Subject: [PATCH 378/770] glasm: Write result to scalar on integer comparison instructions --- .../backend/glasm/emit_glasm_integer.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index adcc0404b..1befeca91 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -175,43 +175,43 @@ void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 m } void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { - ctx.Add("SLT.S {},{},{};", inst, lhs, rhs); + ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs); } void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { - ctx.Add("SLT.U {},{},{};", inst, lhs, rhs); + ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs); } void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { - ctx.Add("SEQ.S {},{},{};", inst, lhs, rhs); + ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs); } void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { - ctx.Add("SLE.S {},{},{};", inst, lhs, rhs); + ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs); } void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { - ctx.Add("SLE.U {},{},{};", inst, lhs, rhs); + ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs); } void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { - ctx.Add("SGT.S {},{},{};", inst, lhs, rhs); + ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs); } void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { - ctx.Add("SGT.U {},{},{};", inst, lhs, rhs); + ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs); } void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { - ctx.Add("SNE.U {},{},{};", inst, lhs, rhs); + ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs); } void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { - ctx.Add("SGE.S {},{},{};", inst, lhs, rhs); + ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs); } void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { - ctx.Add("SGE.U {},{},{};", inst, lhs, rhs); + ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs); } } // namespace Shader::Backend::GLASM From bf5e48ffe4bd48ea681f2a01c8919c97125e88df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 04:48:46 -0300 Subject: [PATCH 379/770] glasm: Initial implementation of phi nodes on GLASM --- .../backend/glasm/emit_glasm.cpp | 59 ++++++++++++++++++- .../backend/glasm/emit_glasm_instructions.h | 3 +- .../glasm/emit_glasm_not_implemented.cpp | 27 +++++++-- .../backend/spirv/emit_spirv.cpp | 6 +- .../backend/spirv/emit_spirv_instructions.h | 3 +- .../frontend/ir/ir_emitter.cpp | 18 ++++-- .../frontend/ir/ir_emitter.h | 4 +- .../frontend/ir/microinstruction.cpp | 3 +- src/shader_recompiler/frontend/ir/opcodes.inc | 3 +- src/shader_recompiler/frontend/ir/value.h | 4 ++ .../maxwell/structured_control_flow.cpp | 6 +- .../ir_opt/ssa_rewrite_pass.cpp | 6 +- 12 files changed, 117 insertions(+), 25 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index fa48ba25c..775dd9e7e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -9,6 +10,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -175,6 +177,34 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } +void Precolor(EmitContext& ctx, const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + switch (phi.Arg(0).Type()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.reg_alloc.Define(phi); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.reg_alloc.LongDefine(phi); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.PhiMove(phi, phi.Arg(i)); + } + // Add reference to the phi node on the phi predecessor to avoid overwritting it + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.DummyReference(IR::Value{&phi}); + } + } + } +} + void EmitCode(EmitContext& ctx, const IR::Program& program) { const auto eval{ [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; @@ -186,7 +216,9 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } break; case IR::AbstractSyntaxNode::Type::If: - ctx.Add("MOV.S.CC RC,{};IF NE.x;", eval(node.if_node.cond)); + ctx.Add("MOV.S.CC RC,{};" + "IF NE.x;", + eval(node.if_node.cond)); break; case IR::AbstractSyntaxNode::Type::EndIf: ctx.Add("ENDIF;"); @@ -195,10 +227,30 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("REP;"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("MOV.S.CC RC,{};BRK NE.x;ENDREP;", eval(node.repeat.cond)); + if (node.repeat.cond.IsImmediate()) { + if (node.repeat.cond.U1()) { + ctx.Add("ENDREP;"); + } else { + ctx.Add("BRK;" + "ENDREP;"); + } + } else { + ctx.Add("MOV.S.CC RC,{};" + "BRK (EQ.x);" + "ENDREP;", + eval(node.repeat.cond)); + } break; case IR::AbstractSyntaxNode::Type::Break: - ctx.Add("MOV.S.CC RC,{};BRK NE.x;", eval(node.repeat.cond)); + if (node.break_node.cond.IsImmediate()) { + if (node.break_node.cond.U1()) { + ctx.Add("BRK;"); + } + } else { + ctx.Add("MOV.S.CC RC,{};" + "BRK (NE.x);", + eval(node.break_node.cond)); + } break; case IR::AbstractSyntaxNode::Type::Return: case IR::AbstractSyntaxNode::Type::Unreachable: @@ -233,6 +285,7 @@ void SetupOptions(std::string& header, Info info) { std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { EmitContext ctx{program}; + Precolor(ctx, program); EmitCode(ctx, program); std::string header = "!!NVcp5.0\n" "OPTION NV_internal;"; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 0f7f16e6e..a74e422d6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -22,7 +22,8 @@ class EmitContext; void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBranchConditionRef(EmitContext&); +void EmitDummyReference(EmitContext&); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); void EmitJoin(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index f37ad5587..969b91a81 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -17,13 +17,32 @@ namespace Shader::Backend::GLASM { #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) -void EmitPhi(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); -} +void EmitPhi(EmitContext&, IR::Inst&) {} void EmitVoid(EmitContext&) {} -void EmitBranchConditionRef(EmitContext&) {} +void EmitDummyReference(EmitContext&) {} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { + if (phi == value) { + return; + } + const Register phi_reg{ctx.reg_alloc.Consume(phi)}; + const Value eval_value{ctx.reg_alloc.Consume(value)}; + switch (phi.InstRecursive()->Arg(0).Type()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value}); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value}); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } +} void EmitJoin(EmitContext& ctx) { NotImplemented(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c22edfec2..7bf8c78de 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -468,7 +468,11 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return id; } -void EmitBranchConditionRef(EmitContext&) {} +void EmitDummyReference(EmitContext&) {} + +void EmitPhiMove(EmitContext&) { + throw LogicError("Unreachable instruction"); +} void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2f4f6e59e..0a2b31772 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -23,7 +23,8 @@ class EmitContext; Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranchConditionRef(EmitContext&); +void EmitDummyReference(EmitContext&); +void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index eb45aa477..def29143e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,6 +61,14 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } +void IREmitter::DummyReference(const Value& value) { + Inst(Opcode::DummyReference, value); +} + +void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { + Inst(Opcode::PhiMove, Value{&phi}, value); +} + void IREmitter::Prologue() { Inst(Opcode::Prologue); } @@ -69,10 +77,6 @@ void IREmitter::Epilogue() { Inst(Opcode::Epilogue); } -void IREmitter::BranchConditionRef(const U1& cond) { - Inst(Opcode::BranchConditionRef, cond); -} - void IREmitter::DemoteToHelperInvocation() { Inst(Opcode::DemoteToHelperInvocation); } @@ -106,6 +110,9 @@ void IREmitter::SetReg(IR::Reg reg, const U32& value) { } U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { + if (pred == Pred::PT) { + return Imm1(!is_negated); + } const U1 value{Inst(Opcode::GetPred, pred)}; if (is_negated) { return Inst(Opcode::LogicalNot, value); @@ -264,6 +271,9 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { U1 IREmitter::Condition(IR::Condition cond) { const FlowTest flow_test{cond.GetFlowTest()}; const auto [pred, is_negated]{cond.GetPred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7a83c33d3..4f7c820fe 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,9 +32,11 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; + void DummyReference(const Value& value); + void PhiMove(IR::Inst& phi, const Value& value); + void Prologue(); void Epilogue(); - void BranchConditionRef(const U1& cond); void DemoteToHelperInvocation(); void EmitVertex(const U32& stream); void EndPrimitive(const U32& stream); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 364574240..267aebc61 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,9 +56,10 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { + case Opcode::DummyReference: + case Opcode::PhiMove: case Opcode::Prologue: case Opcode::Epilogue: - case Opcode::BranchConditionRef: case Opcode::Join: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 75ddb6b6f..6196b867d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -6,11 +6,12 @@ OPCODE(Phi, Opaque, ) OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) +OPCODE(DummyReference, Void, Opaque, ) +OPCODE(PhiMove, Void, Opaque, Opaque, ) // Special operations OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) -OPCODE(BranchConditionRef, Void, U1, ) OPCODE(Join, Void, ) OPCODE(DemoteToHelperInvocation, Void, ) OPCODE(EmitVertex, Void, U32, ) diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 2ce49f953..0c6bf684d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -391,4 +391,8 @@ inline f64 Value::F64() const { return imm_f64; } +[[nodiscard]] inline bool IsPhi(const Inst& inst) { + return inst.GetOpcode() == Opcode::Phi; +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index e7e2e9c82..836d4b8aa 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -704,7 +704,7 @@ private: // Implement if header block IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); const size_t if_node_index{syntax_list.size()}; syntax_list.emplace_back(); @@ -755,7 +755,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); IR::Block* const body_block{syntax_list.at(body_block_index).block}; loop_header_block->AddBranch(body_block); @@ -792,7 +792,7 @@ private: IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); current_block->AddBranch(break_block); current_block->AddBranch(skip_block); current_block = skip_block; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 26eb3a3ab..e54499ba5 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -138,10 +138,6 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } -[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.GetOpcode() == IR::Opcode::Phi; -} - enum class Status { Start, SetValue, @@ -283,7 +279,7 @@ private: list.erase(IR::Block::InstructionList::s_iterator_to(phi)); // Find the first non-phi instruction and use it as an insertion point - IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IsPhi)}; + IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)}; if (same.IsEmpty()) { // The phi is unreachable or in the start block // Insert an undefined instruction and make it the phi node replacement From ab5dbe7c299d904e36099d209b7290e538a84745 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 22:01:01 -0300 Subject: [PATCH 380/770] emit_spirv: Add missing block in case --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7bf8c78de..266ac690c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -111,7 +111,7 @@ void Traverse(EmitContext& ctx, IR::Program& program) { IR::Block* current_block{}; for (const IR::AbstractSyntaxNode& node : program.syntax_list) { switch (node.type) { - case IR::AbstractSyntaxNode::Type::Block: + case IR::AbstractSyntaxNode::Type::Block: { const Id label{node.block->Definition()}; if (current_block) { ctx.OpBranch(label); @@ -122,6 +122,7 @@ void Traverse(EmitContext& ctx, IR::Program& program) { EmitInst(ctx, &inst); } break; + } case IR::AbstractSyntaxNode::Type::If: { const Id if_label{node.if_node.body->Definition()}; const Id endif_label{node.if_node.merge->Definition()}; From 057dee48562b0cce69b1fa8bdb02bc0367852b4d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 14 May 2021 21:18:53 -0400 Subject: [PATCH 381/770] glasm: Implement local memory for glasm --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 +++ .../backend/glasm/emit_glasm_context_get_set.cpp | 8 ++++++++ .../backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 8 -------- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 775dd9e7e..0b70bf3f6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -306,6 +306,9 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { header += fmt::format("R{},", index); } + if (program.local_memory_size > 0) { + header += fmt::format("lmem[{}],", program.local_memory_size); + } header += "RC;" "LONG TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index fed79e381..de0be7aed 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -113,4 +113,12 @@ void EmitSetFragDepth([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Scalar throw NotImplementedException("GLASM instruction"); } +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { + ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { + ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index a74e422d6..e7af8fa88 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -71,7 +71,7 @@ void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx); -void EmitLoadLocal(EmitContext& ctx, ScalarU32 word_offset); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); void EmitUndefU1(EmitContext& ctx); void EmitUndefU8(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 969b91a81..ae1735c8f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -168,14 +168,6 @@ void EmitYDirection(EmitContext& ctx) { NotImplemented(); } -void EmitLoadLocal(EmitContext& ctx, ScalarU32 word_offset) { - NotImplemented(); -} - -void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { - NotImplemented(); -} - void EmitUndefU1(EmitContext& ctx) { NotImplemented(); } From 3764750339fa60f2d79cf3abe1b91ca42ba61401 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:14:29 -0300 Subject: [PATCH 382/770] glasm: Add graphics specific shader declarations to GLASM --- .../backend/glasm/emit_context.cpp | 37 +++++++++++++++++++ .../backend/glasm/emit_glasm.cpp | 32 +++++++++++++--- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index f9d83dd91..66c954ff6 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -2,10 +2,25 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::GLASM { +namespace { +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "FLAT "; + case Interpolation::NoPerspective: + return "NOPERSPECTIVE "; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} +} // Anonymous namespace EmitContext::EmitContext(IR::Program& program) { // FIXME: Temporary partial implementation @@ -42,6 +57,28 @@ EmitContext::EmitContext(IR::Program& program) { stage_name = "compute"; break; } + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + const auto& generic{program.info.input_generics[index]}; + if (generic.used) { + Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", + InterpDecorator(generic.interpolation), index, stage_name, index, index); + } + } + for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { + if (!program.info.stores_frag_color[index]) { + continue; + } + if (index == 0) { + Add("OUTPUT frag_color0=result.color;"); + } else { + Add("OUTPUT frag_color{}[]=result.color[{}];", index, index); + } + } + for (size_t index = 0; index < program.info.stores_generics.size(); ++index) { + if (program.info.stores_generics[index]) { + Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); + } + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0b70bf3f6..ab6790ce8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -261,6 +261,12 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } void SetupOptions(std::string& header, Info info) { + // TODO: Track the shared atomic ops + header += "OPTION NV_internal;" + "OPTION NV_shader_storage_buffer;" + "OPTION NV_gpu_program_fp64;" + "OPTION NV_bindless_texture;" + "OPTION ARB_derivative_control;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -276,10 +282,25 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_subgroup_shuffles) { header += "OPTION NV_shader_thread_shuffle;"; } - // TODO: Track the shared atomic ops - header += "OPTION NV_shader_storage_buffer;" - "OPTION NV_gpu_program_fp64;" - "OPTION NV_bindless_texture;"; +} + +std::string_view StageHeader(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + return "!!NVvp5.0\n"; + case Stage::TessellationControl: + return "!!NVtcs5.0\n"; + case Stage::TessellationEval: + return "!!NVtes5.0\n"; + case Stage::Geometry: + return "!!NVgp5.0\n"; + case Stage::Fragment: + return "!!NVfp5.0\n"; + case Stage::Compute: + return "!!NVcp5.0\n"; + } + throw InvalidArgument("Invalid stage {}", stage); } } // Anonymous namespace @@ -287,8 +308,7 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { EmitContext ctx{program}; Precolor(ctx, program); EmitCode(ctx, program); - std::string header = "!!NVcp5.0\n" - "OPTION NV_internal;"; + std::string header{StageHeader(program.stage)}; SetupOptions(header, program.info); switch (program.stage) { case Stage::Compute: From 31d402ee74d7f7045aec7e748fdee489a434db6b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:15:13 -0300 Subject: [PATCH 383/770] glasm: Add Void type to GLASM values --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 1 + src/shader_recompiler/backend/glasm/reg_alloc.cpp | 3 +++ src/shader_recompiler/backend/glasm/reg_alloc.h | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index ab6790ce8..e5c96eb7f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -54,6 +54,7 @@ public: } switch (value.type) { case Type::Register: + case Type::Void: break; case Type::U32: ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index f556f3aee..0e38f467f 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -57,6 +57,9 @@ void RegAlloc::FreeReg(Register reg) { Value RegAlloc::MakeImm(const IR::Value& value) { Value ret; switch (value.Type()) { + case IR::Type::Void: + ret.type = Type::Void; + break; case IR::Type::U1: ret.type = Type::U32; ret.imm_u32 = value.U1() ? 0xffffffff : 0; diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 5742436cf..ede6edd1f 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -23,6 +23,7 @@ namespace Shader::Backend::GLASM { class EmitContext; enum class Type : u32 { + Void, Register, U32, S32, @@ -65,6 +66,8 @@ struct Value { return false; } switch (type) { + case Type::Void: + return true; case Type::Register: return id == rhs.id; case Type::U32: @@ -218,6 +221,8 @@ struct fmt::formatter { template auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; case Shader::Backend::GLASM::Type::Register: return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: @@ -242,6 +247,8 @@ struct fmt::formatter { template auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; case Shader::Backend::GLASM::Type::Register: return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: @@ -266,6 +273,8 @@ struct fmt::formatter { template auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; case Shader::Backend::GLASM::Type::Register: return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: @@ -290,6 +299,8 @@ struct fmt::formatter { template auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { switch (value.type) { + case Shader::Backend::GLASM::Type::Void: + break; case Shader::Backend::GLASM::Type::Register: return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: From 1f3446b47e2945d2a7a18082e21bc417156c91d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:15:50 -0300 Subject: [PATCH 384/770] glasm: Implement some graphics instructions on GLASM --- .../backend/glasm/emit_glasm_context_get_set.cpp | 9 ++++----- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index de0be7aed..2de7fb9cc 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -54,7 +54,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const u32 element{IR::GenericAttributeElement(attr)}; - ctx.Add("MOV.F {}.x,in_attr{}.{};", inst, index, "xyzw"[element]); + ctx.Add("MOV.F {}.x,in_attr{}[0].{};", inst, index, "xyzw"[element]); return; } throw NotImplementedException("Get attribute {}", attr); @@ -66,7 +66,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.Add("MOV.F out_attr{}.{},{};", index, swizzle, value); + ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); return; } switch (attr) { @@ -100,9 +100,8 @@ void EmitSetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch throw NotImplementedException("GLASM instruction"); } -void EmitSetFragColor([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] u32 index, - [[maybe_unused]] u32 component, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { + ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); } void EmitSetSampleMask([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index ae1735c8f..388b7eb02 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -49,7 +49,7 @@ void EmitJoin(EmitContext& ctx) { } void EmitDemoteToHelperInvocation(EmitContext& ctx) { - NotImplemented(); + ctx.Add("KIL TR.x;"); } void EmitBarrier(EmitContext& ctx) { From 9fb2ea08e84f7194136f5a2f320dd3de04e8ca5d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:16:39 -0300 Subject: [PATCH 385/770] glasm: Initial (broken) implementation of TEX on GLASM --- .../backend/glasm/emit_glasm_image.cpp | 221 +++++++++++++ .../backend/glasm/emit_glasm_image_atomic.cpp | 165 ++++++++++ .../glasm/emit_glasm_not_implemented.cpp | 299 ------------------ 3 files changed, 386 insertions(+), 299 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index e69de29bb..a32d01925 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -0,0 +1,221 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME + return fmt::format("texture[{}]", info.descriptor_index); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coords, Register bias_lc, + [[maybe_unused]] const IR::Value& offset) { + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view op{info.has_bias ? "TXB" : "TEX"}; + const std::string_view lod_clamp{info.has_lod_clamp ? ".LODCLAMP" : ""}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string texture{Texture(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + // FIXME + const bool separate{info.type == TextureType::ColorArrayCube}; + if (separate) { + ctx.Add("{}.F{}{} {},{},{},{},2D;", op, lod_clamp, sparse_mod, ret, coords, bias_lc, + texture); + } else { + ctx.Add("MOV.F {}.w,{}.x;" + "{}.F{}{} {},{},{},2D;", + coords, bias_lc, op, lod_clamp, sparse_mod, ret, coords, texture); + } + if (sparse_inst) { + const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; + ctx.Add("MOV.S {},-1;" + "MOV.S {}(NONRESIDENT),0;", + sparse_ret, sparse_ret); + sparse_inst->Invalidate(); + } +} + +void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] Register lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, + [[maybe_unused]] Register dref, + [[maybe_unused]] Register bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, + [[maybe_unused]] Register dref, + [[maybe_unused]] Register lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2, + [[maybe_unused]] Register dref) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] Register offset, [[maybe_unused]] Register lod, + [[maybe_unused]] Register ms) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register lod) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] Register derivates, [[maybe_unused]] Register offset, + [[maybe_unused]] Register lod_clamp) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] Register color) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp index e69de29bb..a0b9866de 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glasm/emit_context.h" +#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLASM { + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitImageAtomicIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicInc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicDec32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicExchange32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 388b7eb02..a4c1ca481 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -228,303 +228,4 @@ void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("SEQ.S {},{},0;", inst, value); } -void EmitBindlessImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register bias_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register lod_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register dref, Register bias_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register dref, Register lod_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - const IR::Value& offset, const IR::Value& offset2) { - NotImplemented(); -} - -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - const IR::Value& offset, const IR::Value& offset2, Register dref) { - NotImplemented(); -} - -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - Register offset, Register lod, Register ms) { - NotImplemented(); -} - -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register lod) { - NotImplemented(); -} - -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords) { - NotImplemented(); -} - -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - Register derivates, Register offset, Register lod_clamp) { - NotImplemented(); -} - -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords) { - NotImplemented(); -} - -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - Register color) { - NotImplemented(); -} - -void EmitBindlessImageAtomicIAdd32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicSMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicUMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicSMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicUMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicInc32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicDec32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicAnd32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicOr32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicXor32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicExchange32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicIAdd32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicSMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicUMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicSMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicUMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicInc32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicDec32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicAnd32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicOr32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicXor32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicExchange32(EmitContext&) { - NotImplemented(); -} - -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarS32 value) { - NotImplemented(); -} - -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarS32 value) { - NotImplemented(); -} - -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, - ScalarU32 value) { - NotImplemented(); -} - -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value) { - NotImplemented(); -} - } // namespace Shader::Backend::GLASM From 464f13fe0b65a6908086c91e0d3a97d74494a576 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:17:03 -0300 Subject: [PATCH 386/770] glasm: Implement derivative instructions on GLASM --- .../backend/glasm/emit_glasm_instructions.h | 8 ++++---- .../backend/glasm/emit_glasm_warp.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index e7af8fa88..9f76fc6c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -611,9 +611,9 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU3 void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle); -void EmitDPdxFine(EmitContext& ctx, ScalarF32 op_a); -void EmitDPdyFine(EmitContext& ctx, ScalarF32 op_a); -void EmitDPdxCoarse(EmitContext& ctx, ScalarF32 op_a); -void EmitDPdyCoarse(EmitContext& ctx, ScalarF32 op_a); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 37eb577cd..0f987daeb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -99,20 +99,20 @@ void EmitFSwizzleAdd(EmitContext&, ScalarF32, ScalarF32, ScalarU32) { throw NotImplementedException("GLASM instruction"); } -void EmitDPdxFine(EmitContext&, ScalarF32) { - throw NotImplementedException("GLASM instruction"); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + ctx.Add("DDX.FINE {}.x,{};", inst, p); } -void EmitDPdyFine(EmitContext&, ScalarF32) { - throw NotImplementedException("GLASM instruction"); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + ctx.Add("DDY.FINE {}.x,{};", inst, p); } -void EmitDPdxCoarse(EmitContext&, ScalarF32) { - throw NotImplementedException("GLASM instruction"); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + ctx.Add("DDX.COARSE {}.x,{};", inst, p); } -void EmitDPdyCoarse(EmitContext&, ScalarF32) { - throw NotImplementedException("GLASM instruction"); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { + ctx.Add("DDY.COARSE {}.x,{};", inst, p); } } // namespace Shader::Backend::GLASM From 38e7b8c80552a453580db641c20eba9e86cdf106 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:17:40 -0300 Subject: [PATCH 387/770] emit_spirv: Jump to loop body with local variable Silence unused variable warning --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 266ac690c..881a5dc4c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -136,7 +136,7 @@ void Traverse(EmitContext& ctx, IR::Program& program) { const Id endloop_label{node.loop.merge->Definition()}; ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); - ctx.OpBranch(node.loop.body->Definition()); + ctx.OpBranch(body_label); break; } case IR::AbstractSyntaxNode::Type::Break: { From 776ab3ea12f07e2d434a26857d412cff018b1b50 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:18:31 -0300 Subject: [PATCH 388/770] shader: Use a non-trivial dummy to construct ASL node union --- src/shader_recompiler/frontend/ir/abstract_syntax_list.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h index 1366414c2..e9afb4d92 100644 --- a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -13,6 +13,10 @@ namespace Shader::IR { class Block; struct AbstractSyntaxNode { + struct NonTrivialDummy { + NonTrivialDummy() {} + }; + enum class Type { Block, If, @@ -25,7 +29,8 @@ struct AbstractSyntaxNode { }; Type type{}; union { - Block* block{}; + NonTrivialDummy dummy{}; + Block* block; struct { U1 cond; Block* body; From 258f2dec1bc6f1f9d966579c1efb96f76d947060 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:19:08 -0300 Subject: [PATCH 389/770] opengl: Initial (broken) support to GLASM shaders --- .../renderer_opengl/gl_graphics_program.cpp | 15 +++++- .../renderer_opengl/gl_graphics_program.h | 6 ++- .../renderer_opengl/gl_shader_cache.cpp | 46 ++++++++++++++----- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index fd0958719..7c0bf7bc8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -33,10 +33,12 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, + std::array assembly_programs_, const std::array& infos) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)} { + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -290,7 +292,16 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindProgram(program.handle); + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 5adf3f41e..58aa4b0bc 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -73,7 +73,9 @@ public: Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, const std::array& infos); + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos); void Configure(bool is_indexed); @@ -86,6 +88,8 @@ private: StateTracker& state_tracker; OGLProgram program; + std::array assembly_programs; + std::array stage_infos{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d9f0bca78..c10ea2f60 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -185,6 +185,23 @@ GLenum Stage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -269,10 +286,12 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } std::array infos{}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - + OGLProgram source_program; + std::array assembly_programs; Shader::Backend::Bindings binding; + if (!device.UseAssemblyShaders()) { + source_program.handle = glCreateProgram(); + } for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -282,15 +301,20 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - - const std::vector code{EmitSPIRV(profile, program, binding)}; - AddShader(Stage(stage_index), gl_program.handle, code); + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + } else { + const std::vector code{EmitSPIRV(profile, program, binding)}; + AddShader(Stage(stage_index), source_program.handle, code); + } } - LinkProgram(gl_program.handle); - - return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, - program_manager, state_tracker, std::move(gl_program), - infos); + if (!device.UseAssemblyShaders()) { + LinkProgram(source_program.handle); + } + return std::make_unique( + texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + std::move(source_program), std::move(assembly_programs), infos); } std::unique_ptr ShaderCache::CreateComputeProgram( From d4f9c798d652b0a225a47ab9032dd56dc49fa0d0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 15 May 2021 00:53:32 -0400 Subject: [PATCH 390/770] glasm: Implement rest of shared mem --- .../backend/glasm/emit_glasm_instructions.h | 12 ++--- .../glasm/emit_glasm_shared_memory.cpp | 52 ++++++++----------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 9f76fc6c2..ad640bcb9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -120,13 +120,13 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 of Register value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, Register value); -void EmitLoadSharedU8(EmitContext& ctx, ScalarU32 offset); -void EmitLoadSharedS8(EmitContext& ctx, ScalarU32 offset); -void EmitLoadSharedU16(EmitContext& ctx, ScalarU32 offset); -void EmitLoadSharedS16(EmitContext& ctx, ScalarU32 offset); -void EmitLoadSharedU32(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); -void EmitLoadSharedU128(EmitContext& ctx, ScalarU32 offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp index 32cc5d92c..c1498f449 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp @@ -8,57 +8,51 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { -void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset); } -void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset); } -void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset); } -void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset); } -void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset); } -void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] ScalarU32 offset) { - ctx.LongAdd("LDS.U64 {},shared_mem[{}];", inst, offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset); } -void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { + ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset); } -void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U8 {},shared_mem[{}];", value, offset); } -void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { + ctx.Add("STS.U16 {},shared_mem[{}];", value, offset); } -void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] ScalarU32 value) { +void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); } -void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] Register value) { - ctx.Add("STS.U64 {},shared_mem[{}];", value, offset); +void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { + ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset); } -void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { + ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset); } } // namespace Shader::Backend::GLASM From f7a2340205b4fa2db32403f20d7b7afe32b15f33 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 16 May 2021 17:06:13 -0400 Subject: [PATCH 391/770] shader_recompiler: GCC fixes Fixes members of unnamed union not being accessible, and one function without a declaration. --- .../backend/glasm/emit_glasm.cpp | 16 +++---- .../backend/glasm/emit_glasm_image.cpp | 2 +- .../backend/spirv/emit_spirv.cpp | 32 ++++++------- .../frontend/ir/abstract_syntax_list.h | 11 ++--- .../frontend/ir/post_order.cpp | 2 +- .../frontend/maxwell/program.cpp | 2 +- .../maxwell/structured_control_flow.cpp | 48 +++++++++---------- 7 files changed, 55 insertions(+), 58 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index e5c96eb7f..0a76423f4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -212,14 +212,14 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { for (const IR::AbstractSyntaxNode& node : program.syntax_list) { switch (node.type) { case IR::AbstractSyntaxNode::Type::Block: - for (IR::Inst& inst : node.block->Instructions()) { + for (IR::Inst& inst : node.data.block->Instructions()) { EmitInst(ctx, &inst); } break; case IR::AbstractSyntaxNode::Type::If: ctx.Add("MOV.S.CC RC,{};" "IF NE.x;", - eval(node.if_node.cond)); + eval(node.data.if_node.cond)); break; case IR::AbstractSyntaxNode::Type::EndIf: ctx.Add("ENDIF;"); @@ -228,8 +228,8 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("REP;"); break; case IR::AbstractSyntaxNode::Type::Repeat: - if (node.repeat.cond.IsImmediate()) { - if (node.repeat.cond.U1()) { + if (node.data.repeat.cond.IsImmediate()) { + if (node.data.repeat.cond.U1()) { ctx.Add("ENDREP;"); } else { ctx.Add("BRK;" @@ -239,18 +239,18 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("MOV.S.CC RC,{};" "BRK (EQ.x);" "ENDREP;", - eval(node.repeat.cond)); + eval(node.data.repeat.cond)); } break; case IR::AbstractSyntaxNode::Type::Break: - if (node.break_node.cond.IsImmediate()) { - if (node.break_node.cond.U1()) { + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { ctx.Add("BRK;"); } } else { ctx.Add("MOV.S.CC RC,{};" "BRK (NE.x);", - eval(node.break_node.cond)); + eval(node.data.break_node.cond)); } break; case IR::AbstractSyntaxNode::Type::Return: diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index a32d01925..4d146d34e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -105,7 +105,7 @@ void EmitBoundImageWrite(EmitContext&) { throw LogicError("Unreachable instruction"); } -std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, +static std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { // FIXME return fmt::format("texture[{}]", info.descriptor_index); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 881a5dc4c..9ed2af991 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -112,48 +112,48 @@ void Traverse(EmitContext& ctx, IR::Program& program) { for (const IR::AbstractSyntaxNode& node : program.syntax_list) { switch (node.type) { case IR::AbstractSyntaxNode::Type::Block: { - const Id label{node.block->Definition()}; + const Id label{node.data.block->Definition()}; if (current_block) { ctx.OpBranch(label); } - current_block = node.block; + current_block = node.data.block; ctx.AddLabel(label); - for (IR::Inst& inst : node.block->Instructions()) { + for (IR::Inst& inst : node.data.block->Instructions()) { EmitInst(ctx, &inst); } break; } case IR::AbstractSyntaxNode::Type::If: { - const Id if_label{node.if_node.body->Definition()}; - const Id endif_label{node.if_node.merge->Definition()}; + const Id if_label{node.data.if_node.body->Definition()}; + const Id endif_label{node.data.if_node.merge->Definition()}; ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - ctx.OpBranchConditional(ctx.Def(node.if_node.cond), if_label, endif_label); + ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label); break; } case IR::AbstractSyntaxNode::Type::Loop: { - const Id body_label{node.loop.body->Definition()}; - const Id continue_label{node.loop.continue_block->Definition()}; - const Id endloop_label{node.loop.merge->Definition()}; + const Id body_label{node.data.loop.body->Definition()}; + const Id continue_label{node.data.loop.continue_block->Definition()}; + const Id endloop_label{node.data.loop.merge->Definition()}; ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); ctx.OpBranch(body_label); break; } case IR::AbstractSyntaxNode::Type::Break: { - const Id break_label{node.break_node.merge->Definition()}; - const Id skip_label{node.break_node.skip->Definition()}; - ctx.OpBranchConditional(ctx.Def(node.break_node.cond), break_label, skip_label); + const Id break_label{node.data.break_node.merge->Definition()}; + const Id skip_label{node.data.break_node.skip->Definition()}; + ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label); break; } case IR::AbstractSyntaxNode::Type::EndIf: if (current_block) { - ctx.OpBranch(node.end_if.merge->Definition()); + ctx.OpBranch(node.data.end_if.merge->Definition()); } break; case IR::AbstractSyntaxNode::Type::Repeat: { - const Id loop_header_label{node.repeat.loop_header->Definition()}; - const Id merge_label{node.repeat.merge->Definition()}; - ctx.OpBranchConditional(ctx.Def(node.repeat.cond), loop_header_label, merge_label); + const Id loop_header_label{node.data.repeat.loop_header->Definition()}; + const Id merge_label{node.data.repeat.merge->Definition()}; + ctx.OpBranchConditional(ctx.Def(node.data.repeat.cond), loop_header_label, merge_label); break; } case IR::AbstractSyntaxNode::Type::Return: diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h index e9afb4d92..b61773487 100644 --- a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -13,10 +13,6 @@ namespace Shader::IR { class Block; struct AbstractSyntaxNode { - struct NonTrivialDummy { - NonTrivialDummy() {} - }; - enum class Type { Block, If, @@ -27,9 +23,7 @@ struct AbstractSyntaxNode { Return, Unreachable, }; - Type type{}; - union { - NonTrivialDummy dummy{}; + union Data { Block* block; struct { U1 cond; @@ -55,6 +49,9 @@ struct AbstractSyntaxNode { Block* skip; } break_node; }; + + Data data{}; + Type type{}; }; using AbstractSyntaxList = std::vector; diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index 1a28df7fb..16bc44101 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -20,7 +20,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) { if (root.type != AbstractSyntaxNode::Type::Block) { throw LogicError("First node in abstract syntax list root is not a block"); } - Block* const first_block{root.block}; + Block* const first_block{root.data.block}; visited.insert(first_block); block_stack.push_back(first_block); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 017c4b8fd..ccdab1dad 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -22,7 +22,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { })}; IR::BlockList blocks(std::ranges::distance(syntax_blocks)); std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.block; }); + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); return blocks; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 836d4b8aa..83554a953 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -655,8 +655,8 @@ public: syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*syntax_list.front().block}; - IR::IREmitter ir{first_block, first_block.begin()}; + IR::Block& first_block{*syntax_list.front().data.block}; + IR::IREmitter ir = IR::IREmitter(first_block, first_block.begin()); ir.Prologue(); } @@ -670,7 +670,7 @@ private: current_block = block_pool.Create(inst_pool); auto& node{syntax_list.emplace_back()}; node.type = IR::AbstractSyntaxNode::Type::Block; - node.block = current_block; + node.data.block = current_block; }}; Tree& tree{parent.children}; for (auto it = tree.begin(); it != tree.end(); ++it) { @@ -713,24 +713,24 @@ private: const size_t then_block_index{syntax_list.size()}; Visit(stmt, break_block, merge_block); - IR::Block* const then_block{syntax_list.at(then_block_index).block}; + IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; current_block->AddBranch(then_block); current_block->AddBranch(merge_block); current_block = merge_block; auto& if_node{syntax_list[if_node_index]}; if_node.type = IR::AbstractSyntaxNode::Type::If; - if_node.if_node.cond = cond; - if_node.if_node.body = then_block; - if_node.if_node.merge = merge_block; + if_node.data.if_node.cond = cond; + if_node.data.if_node.body = then_block; + if_node.data.if_node.merge = merge_block; auto& endif_node{syntax_list.emplace_back()}; endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; - endif_node.end_if.merge = merge_block; + endif_node.data.end_if.merge = merge_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = merge_block; + merge.data.block = merge_block; break; } case StatementType::Loop: { @@ -740,7 +740,7 @@ private: } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; - header_node.block = loop_header_block; + header_node.data.block = loop_header_block; IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; @@ -757,7 +757,7 @@ private: const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; ir.DummyReference(cond); - IR::Block* const body_block{syntax_list.at(body_block_index).block}; + IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); continue_block->AddBranch(loop_header_block); @@ -767,23 +767,23 @@ private: auto& loop{syntax_list[loop_node_index]}; loop.type = IR::AbstractSyntaxNode::Type::Loop; - loop.loop.body = body_block; - loop.loop.continue_block = continue_block; - loop.loop.merge = merge_block; + loop.data.loop.body = body_block; + loop.data.loop.continue_block = continue_block; + loop.data.loop.merge = merge_block; auto& continue_block_node{syntax_list.emplace_back()}; continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; - continue_block_node.block = continue_block; + continue_block_node.data.block = continue_block; auto& repeat{syntax_list.emplace_back()}; repeat.type = IR::AbstractSyntaxNode::Type::Repeat; - repeat.repeat.cond = cond; - repeat.repeat.loop_header = loop_header_block; - repeat.repeat.merge = merge_block; + repeat.data.repeat.cond = cond; + repeat.data.repeat.loop_header = loop_header_block; + repeat.data.repeat.merge = merge_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = merge_block; + merge.data.block = merge_block; break; } case StatementType::Break: { @@ -799,13 +799,13 @@ private: auto& break_node{syntax_list.emplace_back()}; break_node.type = IR::AbstractSyntaxNode::Type::Break; - break_node.break_node.cond = cond; - break_node.break_node.merge = break_block; - break_node.break_node.skip = skip_block; + break_node.data.break_node.cond = cond; + break_node.data.break_node.merge = break_block; + break_node.data.break_node.skip = skip_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = skip_block; + merge.data.block = skip_block; break; } case StatementType::Return: { @@ -824,7 +824,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = demote_block; + merge.data.block = demote_block; break; } case StatementType::Unreachable: { From 3c06293e20bde507ed1bc5dabf6c66cc443c2ed4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 20:33:57 -0300 Subject: [PATCH 392/770] emit_glasm: Add support for reading position attributes --- .../backend/glasm/emit_glasm_context_get_set.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 2de7fb9cc..9ce6c9214 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -51,13 +51,23 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, [[maybe_unused]] ScalarU32 vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const u32 element{IR::GenericAttributeElement(attr)}; - ctx.Add("MOV.F {}.x,in_attr{}[0].{};", inst, index, "xyzw"[element]); + ctx.Add("MOV.F {}.x,in_attr{}[0].{};", inst, index, swizzle); return; } - throw NotImplementedException("Get attribute {}", attr); + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.stage_name, swizzle); + break; + default: + throw NotImplementedException("Get attribute {}", attr); + } } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, From db2f0f410810d3d8310a6a476a8bcfd5e509869e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 May 2021 17:52:30 -0300 Subject: [PATCH 393/770] emit_glasm: Enable ARB_draw_buffers when needed --- src/shader_recompiler/backend/glasm/emit_context.cpp | 2 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 66c954ff6..4903e9d8e 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -71,7 +71,7 @@ EmitContext::EmitContext(IR::Program& program) { if (index == 0) { Add("OUTPUT frag_color0=result.color;"); } else { - Add("OUTPUT frag_color{}[]=result.color[{}];", index, index); + Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } for (size_t index = 0; index < program.info.stores_generics.size(); ++index) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0a76423f4..0c591f73c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -283,6 +283,10 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_subgroup_shuffles) { header += "OPTION NV_shader_thread_shuffle;"; } + const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; + if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { + header += "OPTION ARB_draw_buffers;"; + } } std::string_view StageHeader(Stage stage) { From bf2949df100d43f3d54ca74a028aa59678ba76c8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 May 2021 17:54:43 -0300 Subject: [PATCH 394/770] glasm: Improve texture sampling instructions --- .../backend/glasm/emit_glasm_image.cpp | 66 ++++++++++++------- .../backend/glasm/emit_glasm_instructions.h | 54 +++++++-------- 2 files changed, 70 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 4d146d34e..7f2cf052a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -112,24 +112,46 @@ static std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInf } void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register bias_lc, + const IR::Value& coord, Register bias_lc, [[maybe_unused]] const IR::Value& offset) { const auto info{inst.Flags()}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; - const std::string_view op{info.has_bias ? "TXB" : "TEX"}; - const std::string_view lod_clamp{info.has_lod_clamp ? ".LODCLAMP" : ""}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; + const std::string_view type{"2D"}; // FIXME const std::string texture{Texture(ctx, info, index)}; + + std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; + if (coord.InstRecursive()->HasUses()) { + // Move non-dead coords to a separate register, although this should never happen because + // vectors are only assembled for immediate texture instructions + ctx.Add("MOV.F RC,{};", coord_vec); + coord_vec = "RC"; + } const Register ret{ctx.reg_alloc.Define(inst)}; - // FIXME - const bool separate{info.type == TextureType::ColorArrayCube}; - if (separate) { - ctx.Add("{}.F{}{} {},{},{},{},2D;", op, lod_clamp, sparse_mod, ret, coords, bias_lc, - texture); + if (info.has_bias) { + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE;", lod_clamp_mod, sparse_mod, ret, coord_vec, + bias_lc, texture); + } else { + if (info.has_lod_clamp) { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type); + } else { + ctx.Add("MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type); + } + } } else { - ctx.Add("MOV.F {}.w,{}.x;" - "{}.F{}{} {},{},{},2D;", - coords, bias_lc, op, lod_clamp, sparse_mod, ret, coords, texture); + if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { + ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE;", sparse_mod, ret, coord_vec, bias_lc, + texture); + } else { + ctx.Add("TEX.F{}{} {},{},{},{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, + type); + } } if (sparse_inst) { const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; @@ -142,7 +164,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] Register lod_lc, + [[maybe_unused]] Register coord, [[maybe_unused]] Register lod_lc, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLASM instruction"); } @@ -150,8 +172,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, - [[maybe_unused]] Register dref, + [[maybe_unused]] Register coord, [[maybe_unused]] Register dref, [[maybe_unused]] Register bias_lc, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLASM instruction"); @@ -160,22 +181,21 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, - [[maybe_unused]] Register dref, + [[maybe_unused]] Register coord, [[maybe_unused]] Register dref, [[maybe_unused]] Register lod_lc, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLASM instruction"); } void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] const IR::Value& offset2) { throw NotImplementedException("GLASM instruction"); } void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] const IR::Value& offset2, [[maybe_unused]] Register dref) { @@ -183,7 +203,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: } void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, [[maybe_unused]] Register offset, [[maybe_unused]] Register lod, [[maybe_unused]] Register ms) { throw NotImplementedException("GLASM instruction"); @@ -196,24 +216,24 @@ void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused] } void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords) { + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord) { throw NotImplementedException("GLASM instruction"); } void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, [[maybe_unused]] Register derivates, [[maybe_unused]] Register offset, [[maybe_unused]] Register lod_clamp) { throw NotImplementedException("GLASM instruction"); } void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords) { + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord) { throw NotImplementedException("GLASM instruction"); } void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, [[maybe_unused]] Register color) { throw NotImplementedException("GLASM instruction"); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index ad640bcb9..a128f9ac4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -525,28 +525,28 @@ void EmitBoundImageGradient(EmitContext&); void EmitBoundImageRead(EmitContext&); void EmitBoundImageWrite(EmitContext&); void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register bias_lc, const IR::Value& offset); + const IR::Value& coord, Register bias_lc, const IR::Value& offset); void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register lod_lc, const IR::Value& offset); + Register coord, Register lod_lc, const IR::Value& offset); void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register dref, Register bias_lc, + Register coord, Register dref, Register bias_lc, const IR::Value& offset); void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, Register dref, Register lod_lc, + Register coord, Register dref, Register lod_lc, const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, const IR::Value& offset, const IR::Value& offset2); -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, const IR::Value& offset, const IR::Value& offset2, Register dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register offset, Register lod, Register ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register lod); -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords); -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register derivates, Register offset, Register lod_clamp); -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register color); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); @@ -570,28 +570,28 @@ void EmitBoundImageAtomicAnd32(EmitContext&); void EmitBoundImageAtomicOr32(EmitContext&); void EmitBoundImageAtomicXor32(EmitContext&); void EmitBoundImageAtomicExchange32(EmitContext&); -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value); -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarS32 value); -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value); -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarS32 value); -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value); -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value); -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value); -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value); -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value); -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coords, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, ScalarU32 value); void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coords, ScalarU32 value); + Register coord, ScalarU32 value); void EmitLaneId(EmitContext& ctx, IR::Inst& inst); void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); From e6b4d461d2424d4fc46eeb3601a5b25cd850c2af Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 May 2021 18:00:31 -0300 Subject: [PATCH 395/770] glasm: Add support for texture offsets --- .../backend/glasm/emit_glasm_image.cpp | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 7f2cf052a..c395248ed 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -120,7 +120,10 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; const std::string_view type{"2D"}; // FIXME const std::string texture{Texture(ctx, info, index)}; - + std::string offset_vec; + if (!offset.IsEmpty()) { + offset_vec = fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); + } std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; if (coord.InstRecursive()->HasUses()) { // Move non-dead coords to a separate register, although this should never happen because @@ -131,26 +134,27 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const Register ret{ctx.reg_alloc.Define(inst)}; if (info.has_bias) { if (info.type == TextureType::ColorArrayCube) { - ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE;", lod_clamp_mod, sparse_mod, ret, coord_vec, - bias_lc, texture); + ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); } else { if (info.has_lod_clamp) { ctx.Add("MOV.F {}.w,{}.x;" - "TXB.F.LODCLAMP{} {},{},{}.y,{},{};", - coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type); + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, + offset_vec); } else { ctx.Add("MOV.F {}.w,{}.x;" - "TXB.F{} {},{},{},{};", - coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type); + "TXB.F{} {},{},{},{}{};", + coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); } } } else { if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { - ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE;", sparse_mod, ret, coord_vec, bias_lc, - texture); + ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, + bias_lc, texture, offset_vec); } else { - ctx.Add("TEX.F{}{} {},{},{},{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, - type); + ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, + type, offset_vec); } } if (sparse_inst) { From bee9fb0563f2bffd08df6f78091233fb3fc8d4ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 May 2021 18:01:09 -0300 Subject: [PATCH 396/770] glasm: Reorder unreachable image instructions to the bottom --- .../backend/glasm/emit_glasm_image.cpp | 194 +++++++++--------- 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index c395248ed..247118dff 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -9,103 +9,7 @@ namespace Shader::Backend::GLASM { -void EmitBindlessImageSampleImplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageSampleExplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageGather(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageGatherDref(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageFetch(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageQueryDimensions(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageQueryLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageGradient(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageRead(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageWrite(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageSampleImplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageSampleExplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageGather(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageGatherDref(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageFetch(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageQueryDimensions(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageQueryLod(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageGradient(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageRead(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageWrite(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -static std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, +std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { // FIXME return fmt::format("texture[{}]", info.descriptor_index); @@ -242,4 +146,100 @@ void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst throw NotImplementedException("GLASM instruction"); } +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::GLASM From c42a6143a55e202974f6ea03e4eb752151c754c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 May 2021 18:06:37 -0300 Subject: [PATCH 397/770] glasm: Add support for non-2D texture samples --- .../backend/glasm/emit_glasm_image.cpp | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 247118dff..2af5483d9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -8,21 +8,43 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { - +namespace { std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { // FIXME return fmt::format("texture[{}]", info.descriptor_index); } +std::string_view TextureType(IR::TextureInstInfo info) { + switch (info.type) { + case TextureType::Color1D: + return "1D"; + case TextureType::ColorArray1D: + return "ARRAY1D"; + case TextureType::Color2D: + return "2D"; + case TextureType::ColorArray2D: + return "ARRAY2D"; + case TextureType::Color3D: + return "3D"; + case TextureType::ColorCube: + return "CUBE"; + case TextureType::ColorArrayCube: + return "ARRAYCUBE"; + case TextureType::Buffer: + return "BUFFER"; + } + throw InvalidArgument("Invalid texture type {}", info.type.Value()); +} +} // Anonymous namespace + void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - const IR::Value& coord, Register bias_lc, - [[maybe_unused]] const IR::Value& offset) { + const IR::Value& coord, Register bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; - const std::string_view type{"2D"}; // FIXME + const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; std::string offset_vec; if (!offset.IsEmpty()) { From ec6fc5fe78c9038fc9ad7259b7b3a7be751ecef6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 02:52:01 -0300 Subject: [PATCH 398/770] glasm: Implement TEX and TEXS instructions Remove lod clamp from texture instructions with lod, as this is not needed (nor supported). --- .../backend/glasm/emit_glasm.cpp | 3 + .../backend/glasm/emit_glasm_image.cpp | 284 +++++++++++++++--- .../backend/glasm/emit_glasm_instructions.h | 6 +- .../backend/glasm/emit_glasm_memory.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 8 +- .../backend/spirv/emit_spirv_instructions.h | 4 +- .../frontend/ir/ir_emitter.cpp | 11 +- .../frontend/ir/ir_emitter.h | 5 +- .../maxwell/translate/impl/texture_fetch.cpp | 5 +- .../translate/impl/texture_fetch_swizzled.cpp | 18 +- 10 files changed, 276 insertions(+), 70 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0c591f73c..d7a08e4b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -283,6 +283,9 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_subgroup_shuffles) { header += "OPTION NV_shader_thread_shuffle;"; } + if (info.uses_sparse_residency) { + header += "OPTION EXT_sparse_texture2;"; + } const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { header += "OPTION ARB_draw_buffers;"; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 2af5483d9..3d76ab315 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -9,6 +9,34 @@ namespace Shader::Backend::GLASM { namespace { +struct ScopedRegister { + ScopedRegister() = default; + ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} + + ~ScopedRegister() { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + } + + ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { + if (reg_alloc) { + reg_alloc->FreeReg(reg); + } + reg_alloc = std::exchange(rhs.reg_alloc, nullptr); + reg = rhs.reg; + } + + ScopedRegister(ScopedRegister&& rhs) noexcept + : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} + + ScopedRegister& operator=(const ScopedRegister&) = delete; + ScopedRegister(const ScopedRegister&) = delete; + + RegAlloc* reg_alloc{}; + Register reg; +}; + std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { // FIXME @@ -36,6 +64,61 @@ std::string_view TextureType(IR::TextureInstInfo info) { } throw InvalidArgument("Invalid texture type {}", info.type.Value()); } + +std::string_view ShadowTextureType(IR::TextureInstInfo info) { + switch (info.type) { + case TextureType::Color1D: + return "SHADOW1D"; + case TextureType::ColorArray1D: + return "SHADOWARRAY1D"; + case TextureType::Color2D: + return "SHADOW2D"; + case TextureType::ColorArray2D: + return "SHADOWARRAY2D"; + case TextureType::Color3D: + return "SHADOW3D"; + case TextureType::ColorCube: + return "SHADOWCUBE"; + case TextureType::ColorArrayCube: + return "SHADOWARRAYCUBE"; + case TextureType::Buffer: + return "SHADOWBUFFER"; + } + throw InvalidArgument("Invalid texture type {}", info.type.Value()); +} + +std::string Offset(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsEmpty()) { + return ""; + } + return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); +} + +std::pair Coord(EmitContext& ctx, const IR::Value& coord) { + if (coord.IsImmediate()) { + ScopedRegister scoped_reg(ctx.reg_alloc); + return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; + } + std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; + if (coord.InstRecursive()->HasUses()) { + // Move non-dead coords to a separate register, although this should never happen because + // vectors are only assembled for immediate texture instructions + ctx.Add("MOV.F RC,{};", coord_vec); + coord_vec = "RC"; + } + return {std::move(coord_vec), ScopedRegister{}}; +} + +void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { + if (!sparse_inst) { + return; + } + const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; + ctx.Add("MOV.S {},-1;" + "MOV.S {}(NONRESIDENT),0;", + sparse_ret, sparse_ret); + sparse_inst->Invalidate(); +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, @@ -46,17 +129,8 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; - std::string offset_vec; - if (!offset.IsEmpty()) { - offset_vec = fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); - } - std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; - if (coord.InstRecursive()->HasUses()) { - // Move non-dead coords to a separate register, although this should never happen because - // vectors are only assembled for immediate texture instructions - ctx.Add("MOV.F RC,{};", coord_vec); - coord_vec = "RC"; - } + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; const Register ret{ctx.reg_alloc.Define(inst)}; if (info.has_bias) { if (info.type == TextureType::ColorArrayCube) { @@ -83,38 +157,172 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu type, offset_vec); } } - if (sparse_inst) { - const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; - ctx.Add("MOV.S {},-1;" - "MOV.S {}(NONRESIDENT),0;", - sparse_ret, sparse_ret); - sparse_inst->Invalidate(); + StoreSparse(ctx, sparse_inst); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, + offset_vec); + } else { + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); } + StoreSparse(ctx, sparse_inst); } -void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coord, [[maybe_unused]] Register lod_lc, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLASM instruction"); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 dref, Register bias_lc, + const IR::Value& offset) { + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{ShadowTextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.has_bias) { + if (info.has_lod_clamp) { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", + coord_vec, dref, coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, + texture, type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref, sparse_mod, ret, coord_vec, bias_lc, texture, type, + offset_vec); + break; + default: + throw NotImplementedException("Invalid type {} with bias and lod clamp", + info.type.Value()); + } + } else { + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{}.x;" + "TXB.F{} {},{},{},{}{};", + coord_vec, dref, coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, + type, offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXB.F{} {},{},{},{},{}{};", + coord_vec, dref, sparse_mod, ret, coord_vec, bias_lc, texture, type, + offset_vec); + break; + case TextureType::ColorArrayCube: { + const ScopedRegister pair{ctx.reg_alloc}; + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{}.x;" + "TXB.F{} {},{},{},{},{}{};", + pair.reg, dref, pair.reg, bias_lc, sparse_mod, ret, coord_vec, pair.reg, + texture, type, offset_vec); + break; + } + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + } + } else { + if (info.has_lod_clamp) { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + coord_vec, dref_swizzle, dref, sparse_mod, ret, coord_vec, bias_lc, texture, + type, offset_vec); + } else { + const ScopedRegister pair{ctx.reg_alloc}; + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TEX.F.LODCLAMP{} {},{},{},{},{}{};", + pair.reg, dref, pair.reg, bias_lc, sparse_mod, ret, coord_vec, pair.reg, + texture, type, offset_vec); + } + } else { + if (info.type != TextureType::ColorArrayCube) { + const bool w_swizzle{info.type == TextureType::ColorArray2D || + info.type == TextureType::ColorCube}; + const char dref_swizzle{w_swizzle ? 'w' : 'z'}; + ctx.Add("MOV.F {}.{},{};" + "TEX.F{} {},{},{},{}{};", + coord_vec, dref_swizzle, dref, sparse_mod, ret, coord_vec, texture, type, + offset_vec); + } else { + const ScopedRegister pair{ctx.reg_alloc}; + ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref, texture, + type, offset_vec); + } + } + } + StoreSparse(ctx, sparse_inst); } -void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coord, [[maybe_unused]] Register dref, - [[maybe_unused]] Register bias_lc, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coord, [[maybe_unused]] Register dref, - [[maybe_unused]] Register lod_lc, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLASM instruction"); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, ScalarF32 dref, ScalarF32 lod, + const IR::Value& offset) { + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{ShadowTextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};" + "MOV.F {}.w,{};" + "TXL.F{} {},{},{},{}{};", + coord_vec, dref, coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, + offset_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};" + "TXL.F{} {},{},{},{},{}{};", + coord_vec, dref, sparse_mod, ret, coord_vec, lod, texture, type, offset_vec); + break; + case TextureType::ColorArrayCube: { + const ScopedRegister pair{ctx.reg_alloc}; + ctx.Add("MOV.F {}.x,{};" + "MOV.F {}.y,{};" + "TXL.F{} {},{},{},{},{}{};", + pair.reg, dref, pair.reg, lod, sparse_mod, ret, coord_vec, pair.reg, texture, type, + offset_vec); + break; + } + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + StoreSparse(ctx, sparse_inst); } void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index a128f9ac4..54e7fab3c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -527,12 +527,12 @@ void EmitBoundImageWrite(EmitContext&); void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, Register bias_lc, const IR::Value& offset); void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coord, Register lod_lc, const IR::Value& offset); + const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coord, Register dref, Register bias_lc, + const IR::Value& coord, ScalarF32 dref, Register bias_lc, const IR::Value& offset); void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register coord, Register dref, Register lod_lc, + const IR::Value& coord, ScalarF32 dref, ScalarF32 lod, const IR::Value& offset); void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, const IR::Value& offset, const IR::Value& offset2); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 0c6a6e1c8..dd307a9a3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -38,7 +38,7 @@ void Store(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueTy void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, ret), + StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), fmt::format("MOV.U {},{{0,0,0,0}};", ret)); } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 5832104df..99b883746 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -337,9 +337,9 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& } Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset) { + Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + const ImageOperands operands(ctx, false, true, false, lod, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); @@ -356,9 +356,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset) { + Id coords, Id dref, Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + const ImageOperands operands(ctx, false, true, false, lod, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 0a2b31772..22260d2a9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -491,11 +491,11 @@ Id EmitBoundImageWrite(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, const IR::Value& offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset); + Id lod, const IR::Value& offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id bias_lc, const IR::Value& offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset); + Id coords, Id dref, Id lod, const IR::Value& offset); Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index def29143e..94bdbe39c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1778,12 +1778,10 @@ Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords } Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, - const Value& offset, const F32& lod_clamp, - TextureInstInfo info) { - const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Value& offset, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod : Opcode::BindlessImageSampleExplicitLod}; - return Inst(op, Flags{info}, handle, coords, lod_lc, offset); + return Inst(op, Flags{info}, handle, coords, lod, offset); } F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, @@ -1796,12 +1794,11 @@ F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coor } F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& lod, const Value& offset, const F32& lod_clamp, + const F32& lod, const Value& offset, TextureInstInfo info) { - const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod : Opcode::BindlessImageSampleDrefExplicitLod}; - return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); + return Inst(op, Flags{info}, handle, coords, dref, lod, offset); } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4f7c820fe..4ae69b788 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -302,15 +302,14 @@ public: const F32& lod_clamp, TextureInstInfo info); [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); + TextureInstInfo info); [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& bias, const Value& offset, const F32& lod_clamp, TextureInstInfo info); [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& lod, - const Value& offset, const F32& lod_clamp, - TextureInstInfo info); + const Value& offset, TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 9671d115e..0046b5edd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -177,14 +177,13 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, const IR::Value sample{[&]() -> IR::Value { if (tex.dc == 0) { if (HasExplicitLod(blod)) { - return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); } else { return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); } } if (HasExplicitLod(blod)) { - return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); } else { return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 3500a4559..154e7f1a1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -81,18 +81,18 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { switch (texs.encoding) { case 0: // 1D.LZ info.type.Assign(TextureType::Color1D); - return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); case 1: // 2D info.type.Assign(TextureType::Color2D); return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); case 2: // 2D.LZ info.type.Assign(TextureType::Color2D); - return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); case 3: // 2D.LL CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, - {}, info); + info); case 4: // 2D.DC CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); @@ -105,13 +105,13 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.type.Assign(TextureType::Color2D); info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), - v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + v.F(reg_b + 1), v.F(reg_b), {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), - zero, {}, {}, info); + zero, {}, info); case 7: // ARRAY_2D CheckAlignment(reg_a, 2); info.type.Assign(TextureType::ColorArray2D); @@ -123,7 +123,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.type.Assign(TextureType::ColorArray2D); return v.ir.ImageSampleExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), - zero, {}, {}, info); + zero, {}, info); case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); @@ -131,7 +131,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), - v.F(reg_b + 1), zero, {}, {}, info); + v.F(reg_b + 1), zero, {}, info); case 10: // 3D CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color3D); @@ -141,7 +141,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color3D); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, - {}, info); + info); case 12: // CUBE CheckAlignment(reg_a, 2); info.type.Assign(TextureType::ColorCube); @@ -152,7 +152,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { CheckAlignment(reg_b, 2); info.type.Assign(TextureType::ColorCube); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), - v.F(reg_b + 1), {}, {}, info); + v.F(reg_b + 1), {}, info); default: throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); } From f1b334b9f950fa58e2cb0e27574b06a0622a99f6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 04:44:13 -0300 Subject: [PATCH 399/770] glasm: Remove unintentional comma on vector insert --- src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index 22321f386..d829f05b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -52,7 +52,7 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec // The input composite is not aliased with the return value so we have to copy it before // hand. But the insert object is not aliased with the return value, so we don't have to // worry about that - ctx.Add("MOV.{} {},{};MOV.{},{}.{},{};", type, ret, composite, type, ret, swizzle, object); + ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object); } else { // The return value is alised so we can just insert the object, it doesn't matter if it's // aliased From fb3ba62b3a47ad645b007d5031ed9f8aaa7cb5c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 19:24:09 -0300 Subject: [PATCH 400/770] glasm: Fix aliased bitcasts ref counting --- .../glasm/emit_glasm_bitwise_conversion.cpp | 10 ++--- .../backend/glasm/reg_alloc.cpp | 39 +++++++++++++++---- .../backend/glasm/reg_alloc.h | 6 +++ 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index a6c66b826..cdbf6e93e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -12,12 +12,10 @@ static void Alias(IR::Inst& inst, const IR::Value& value) { if (value.IsImmediate()) { return; } - IR::Inst* const value_inst{value.InstRecursive()}; - if (inst.GetOpcode() == IR::Opcode::Identity) { - value_inst->DestructiveAddUsage(inst.UseCount()); - value_inst->DestructiveRemoveUsage(); - } - inst.SetDefinition(value_inst->Definition()); + IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition()); } void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 0e38f467f..707b22247 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -30,9 +30,10 @@ Value RegAlloc::Consume(const IR::Value& value) { } void RegAlloc::Unref(IR::Inst& inst) { - inst.DestructiveRemoveUsage(); - if (!inst.HasUses()) { - Free(inst.Definition()); + IR::Inst& value_inst{AliasInst(inst)}; + value_inst.DestructiveRemoveUsage(); + if (!value_inst.HasUses()) { + Free(value_inst.Definition()); } } @@ -99,10 +100,7 @@ Value RegAlloc::PeekInst(IR::Inst& inst) { } Value RegAlloc::ConsumeInst(IR::Inst& inst) { - inst.DestructiveRemoveUsage(); - if (!inst.HasUses()) { - Free(inst.Definition()); - } + Unref(inst); return PeekInst(inst); } @@ -138,4 +136,31 @@ void RegAlloc::Free(Id id) { } } +/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::Identity: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastU32F32: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::BitCastF32U32: + case IR::Opcode::BitCastF64U64: + return true; + default: + return false; + } +} + +/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { + IR::Inst* it{&inst}; + while (IsAliased(*it)) { + const IR::Value arg{it->Arg(0)}; + if (arg.IsImmediate()) { + break; + } + it = arg.InstRecursive(); + } + return *it; +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index ede6edd1f..41b7c92be 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -126,6 +126,12 @@ public: return num_used_long_registers; } + /// Returns true if the instruction is expected to be aliased to another + static bool IsAliased(const IR::Inst& inst); + + /// Returns the underlying value out of an alias sequence + static IR::Inst& AliasInst(IR::Inst& inst); + private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; From afcb14018542825e23f392698b93fb4a9a4b4a8b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 19:25:01 -0300 Subject: [PATCH 401/770] glasm: Add missing return value on move assignment --- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 3d76ab315..77da3400b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -25,6 +25,7 @@ struct ScopedRegister { } reg_alloc = std::exchange(rhs.reg_alloc, nullptr); reg = rhs.reg; + return *this; } ScopedRegister(ScopedRegister&& rhs) noexcept From 4bad415bcab581c55770a16ef2e8fe1e6d105ff1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 19:25:19 -0300 Subject: [PATCH 402/770] glasm: Implement InstanceId and VertexId --- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 9ce6c9214..ceadb3333 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -65,6 +65,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::PositionW: ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.stage_name, swizzle); break; + case IR::Attribute::InstanceId: + ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.stage_name); + break; + case IR::Attribute::VertexId: + ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.stage_name); + break; default: throw NotImplementedException("Get attribute {}", attr); } From 9bb3e008c9f4bbdd35c095b506c3a3312d17e383 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 02:04:22 -0300 Subject: [PATCH 403/770] shader: Read branch conditions from an instruction Fixes the identity removal pass. --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 2 +- .../backend/glasm/emit_glasm_bitwise_conversion.cpp | 4 ++++ .../backend/glasm/emit_glasm_instructions.h | 3 ++- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- src/shader_recompiler/backend/glasm/reg_alloc.cpp | 1 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 10 +++++++++- .../backend/spirv/emit_spirv_instructions.h | 3 ++- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +++- src/shader_recompiler/frontend/ir/microinstruction.cpp | 3 ++- src/shader_recompiler/frontend/ir/opcodes.inc | 3 ++- .../frontend/maxwell/structured_control_flow.cpp | 9 +++------ 12 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index d7a08e4b3..a893fa3fb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -200,7 +200,7 @@ void Precolor(EmitContext& ctx, const IR::Program& program) { } // Add reference to the phi node on the phi predecessor to avoid overwritting it for (size_t i = 0; i < num_args; ++i) { - IR::IREmitter{*phi.PhiBlock(i)}.DummyReference(IR::Value{&phi}); + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); } } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index cdbf6e93e..505378bfd 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -22,6 +22,10 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } +void EmitConditionRef(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); +} + void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 54e7fab3c..df0933a3f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -22,7 +22,8 @@ class EmitContext; void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitDummyReference(EmitContext&); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext&); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); void EmitJoin(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index a4c1ca481..015cb5576 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -21,7 +21,7 @@ void EmitPhi(EmitContext&, IR::Inst&) {} void EmitVoid(EmitContext&) {} -void EmitDummyReference(EmitContext&) {} +void EmitReference(EmitContext&) {} void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { if (phi == value) { diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 707b22247..1a88331b4 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -139,6 +139,7 @@ void RegAlloc::Free(Id id) { /*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::Identity: + case IR::Opcode::ConditionRef: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastU32F32: case IR::Opcode::BitCastU64F64: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9ed2af991..3e20ac3b9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -469,7 +469,15 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return id; } -void EmitDummyReference(EmitContext&) {} +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { + const Id id{ctx.Def(value)}; + if (!Sirit::ValidId(id)) { + throw NotImplementedException("Forward identity declaration"); + } + return id; +} + +void EmitReference(EmitContext&) {} void EmitPhiMove(EmitContext&) { throw LogicError("Unreachable instruction"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 22260d2a9..1181e7b4f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -23,7 +23,8 @@ class EmitContext; Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitDummyReference(EmitContext&); +Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 94bdbe39c..e9fd41237 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,8 +61,12 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::DummyReference(const Value& value) { - Inst(Opcode::DummyReference, value); +U1 IREmitter::ConditionRef(const U1& value) { + return Inst(Opcode::ConditionRef, value); +} + +void IREmitter::Reference(const Value& value) { + Inst(Opcode::Reference, value); } void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4ae69b788..bb3500c54 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,7 +32,9 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void DummyReference(const Value& value); + U1 ConditionRef(const U1& value); + void Reference(const Value& value); + void PhiMove(IR::Inst& phi, const Value& value); void Prologue(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 267aebc61..3dfa5a880 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,7 +56,8 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { - case Opcode::DummyReference: + case Opcode::ConditionRef: + case Opcode::Reference: case Opcode::PhiMove: case Opcode::Prologue: case Opcode::Epilogue: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6196b867d..8a8d0d759 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -6,7 +6,8 @@ OPCODE(Phi, Opaque, ) OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) -OPCODE(DummyReference, Void, Opaque, ) +OPCODE(ConditionRef, U1, U1, ) +OPCODE(Reference, Void, Opaque, ) OPCODE(PhiMove, Void, Opaque, Opaque, ) // Special operations diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 83554a953..ebe5c2654 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -703,8 +703,7 @@ private: // Implement if header block IR::IREmitter ir{*current_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; const size_t if_node_index{syntax_list.size()}; syntax_list.emplace_back(); @@ -754,8 +753,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -791,8 +789,7 @@ private: IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; current_block->AddBranch(break_block); current_block->AddBranch(skip_block); current_block = skip_block; From 1ee7f8b943d1ab6ac6dec18bae6c2be3fd8d246c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 04:04:06 -0300 Subject: [PATCH 404/770] glasm: Do not alias ConditionRef for now Immediate condition refs where not handled correctly. Just move the value for now. --- .../backend/glasm/emit_glasm_bitwise_conversion.cpp | 4 ++-- src/shader_recompiler/backend/glasm/reg_alloc.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 505378bfd..808c72105 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -22,8 +22,8 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } -void EmitConditionRef(EmitContext&, IR::Inst& inst, const IR::Value& value) { - Alias(inst, value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)}); } void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 1a88331b4..707b22247 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -139,7 +139,6 @@ void RegAlloc::Free(Id id) { /*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::Identity: - case IR::Opcode::ConditionRef: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastU32F32: case IR::Opcode::BitCastU64F64: From 0fa421f82f4efbb7fe2d86a18427b4889fd8d855 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 19:43:17 -0300 Subject: [PATCH 405/770] glasm: Implement gl_FragDepth and gl_SampleMask stores --- .../backend/glasm/emit_glasm_context_get_set.cpp | 8 ++++---- .../backend/glasm/emit_glasm_instructions.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index ceadb3333..0b6d853eb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -120,12 +120,12 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 valu ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); } -void EmitSetSampleMask([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) { + ctx.Add("MOV.S result.samplemask.x,{};", value); } -void EmitSetFragDepth([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) { + ctx.Add("MOV.F result.depth.z,{};", value); } void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index df0933a3f..ce0181fd2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -56,7 +56,7 @@ void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value void EmitGetPatch(EmitContext& ctx, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); -void EmitSetSampleMask(EmitContext& ctx, ScalarF32 value); +void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); From 3d3ed53511dcd338806a97bfff2afa1644ddb424 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 19:43:52 -0300 Subject: [PATCH 406/770] glasm: Implement textureGather instructions --- .../backend/glasm/emit_glasm_image.cpp | 103 ++++++++++++++++-- .../backend/glasm/emit_glasm_instructions.h | 9 +- 2 files changed, 97 insertions(+), 15 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 77da3400b..9da4da331 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" @@ -95,6 +97,33 @@ std::string Offset(EmitContext& ctx, const IR::Value& offset) { return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); } +std::pair AllocOffsetsRegs(EmitContext& ctx, + const IR::Value& offset2) { + if (offset2.IsEmpty()) { + return {}; + } else { + return {ctx.reg_alloc, ctx.reg_alloc}; + } +} + +void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, + const IR::Value& offset2) { + const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; + const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; + // Input swizzle: [XYXY] [XYXY] + // Output swizzle: [XXXX] [YYYY] + ctx.Add("MOV {}.x,{}.x;" + "MOV {}.y,{}.z;" + "MOV {}.z,{}.x;" + "MOV {}.w,{}.z;" + "MOV {}.x,{}.y;" + "MOV {}.y,{}.w;" + "MOV {}.z,{}.y;" + "MOV {}.w,{}.w;", + off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, + offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); +} + std::pair Coord(EmitContext& ctx, const IR::Value& coord) { if (coord.IsImmediate()) { ScopedRegister scoped_reg(ctx.reg_alloc); @@ -326,19 +355,71 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: StoreSparse(ctx, sparse_inst); } -void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] const IR::Value& offset2) { - throw NotImplementedException("GLASM instruction"); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags()}; + const char comp{"xyzw"[info.gather_component]}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, + offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, + texture, comp, type); + } + StoreSparse(ctx, sparse_inst); } -void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] const IR::Value& offset2, - [[maybe_unused]] Register dref) { - throw NotImplementedException("GLASM instruction"); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref) { + // FIXME: This instruction is not working as expected + + // Allocate offsets early so they don't overwrite any consumed register + const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{ShadowTextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + std::string args; + switch (info.type) { + case TextureType::Color2D: + ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArray2D: + case TextureType::ColorCube: + ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); + args = fmt::to_string(coord_vec); + break; + case TextureType::ColorArrayCube: + args = fmt::format("{},{}", coord_vec, dref_value); + break; + default: + throw NotImplementedException("Invalid type {}", info.type.Value()); + } + if (offset2.IsEmpty()) { + const std::string offset_vec{Offset(ctx, offset)}; + ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); + } else { + SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); + ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, + type); + } + StoreSparse(ctx, sparse_inst); } void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index ce0181fd2..53e9bdac7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -535,10 +535,11 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, ScalarF32 dref, ScalarF32 lod, const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, - const IR::Value& offset, const IR::Value& offset2); -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, - const IR::Value& offset, const IR::Value& offset2, Register dref); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, + const IR::Value& dref); void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register offset, Register lod, Register ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, From 8b7d5912d61d56f65fb7e3a03bba544a4c40bfa6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:04:09 -0300 Subject: [PATCH 407/770] glasm: Support textures used in more than one stage --- .../backend/glasm/emit_context.cpp | 10 +++++++++- src/shader_recompiler/backend/glasm/emit_context.h | 14 +++++++++++++- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 4 ++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 4903e9d8e..d1fe84a5f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -4,6 +4,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" @@ -22,7 +23,7 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program) { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings) : info{program.info} { // FIXME: Temporary partial implementation u32 cbuf_index{}; for (const auto& desc : program.info.constant_buffer_descriptors) { @@ -79,6 +80,13 @@ EmitContext::EmitContext(IR::Program& program) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } + const size_t num_textures{program.info.texture_descriptors.size()}; + texture_bindings.resize(num_textures); + for (size_t index = 0; index < num_textures; ++index) { + const auto& desc{program.info.texture_descriptors[index]}; + texture_bindings[index] = bindings.texture; + bindings.texture += desc.count; + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 4efe42ada..084635c77 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -6,11 +6,20 @@ #include #include +#include #include #include "shader_recompiler/backend/glasm/reg_alloc.h" +namespace Shader { +struct Info; +} + +namespace Shader::Backend { +struct Bindings; +} + namespace Shader::IR { class Inst; struct Program; @@ -20,7 +29,7 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program); + explicit EmitContext(IR::Program& program, Bindings& bindings); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -45,6 +54,9 @@ public: std::string code; RegAlloc reg_alloc{*this}; + const Info& info; + + std::vector texture_bindings; std::string_view stage_name = "invalid"; }; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index a893fa3fb..edf6f5e13 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -312,8 +312,8 @@ std::string_view StageHeader(Stage stage) { } } // Anonymous namespace -std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { - EmitContext ctx{program}; +std::string EmitGLASM(const Profile&, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c10ea2f60..b84b36b9d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -302,7 +302,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, program, binding)}; From c560bf99c273e21f53387635515a08f5d50b8583 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:04:27 -0300 Subject: [PATCH 408/770] glasm: Implement gl_FrongFacing attribute --- .../backend/glasm/emit_glasm_context_get_set.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 0b6d853eb..8c5cd286d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -71,6 +71,9 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::VertexId: ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.stage_name); break; + case IR::Attribute::FrontFace: + ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.stage_name); + break; default: throw NotImplementedException("Get attribute {}", attr); } From 0a42277a4f7ffc822b334781467ed4588335ba02 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:05:46 -0300 Subject: [PATCH 409/770] glasm: Implement TXQ and improve texture info reads --- .../backend/glasm/emit_glasm_image.cpp | 99 ++++++++++--------- .../backend/glasm/emit_glasm_instructions.h | 2 +- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 9da4da331..7b95505e2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -40,52 +40,51 @@ struct ScopedRegister { Register reg; }; -std::string Texture([[maybe_unused]] EmitContext& ctx, IR::TextureInstInfo info, +std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { - // FIXME - return fmt::format("texture[{}]", info.descriptor_index); + // FIXME: indexed reads + return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); } std::string_view TextureType(IR::TextureInstInfo info) { - switch (info.type) { - case TextureType::Color1D: - return "1D"; - case TextureType::ColorArray1D: - return "ARRAY1D"; - case TextureType::Color2D: - return "2D"; - case TextureType::ColorArray2D: - return "ARRAY2D"; - case TextureType::Color3D: - return "3D"; - case TextureType::ColorCube: - return "CUBE"; - case TextureType::ColorArrayCube: - return "ARRAYCUBE"; - case TextureType::Buffer: - return "BUFFER"; - } - throw InvalidArgument("Invalid texture type {}", info.type.Value()); -} - -std::string_view ShadowTextureType(IR::TextureInstInfo info) { - switch (info.type) { - case TextureType::Color1D: - return "SHADOW1D"; - case TextureType::ColorArray1D: - return "SHADOWARRAY1D"; - case TextureType::Color2D: - return "SHADOW2D"; - case TextureType::ColorArray2D: - return "SHADOWARRAY2D"; - case TextureType::Color3D: - return "SHADOW3D"; - case TextureType::ColorCube: - return "SHADOWCUBE"; - case TextureType::ColorArrayCube: - return "SHADOWARRAYCUBE"; - case TextureType::Buffer: - return "SHADOWBUFFER"; + if (info.is_depth) { + switch (info.type) { + case TextureType::Color1D: + return "SHADOW1D"; + case TextureType::ColorArray1D: + return "SHADOWARRAY1D"; + case TextureType::Color2D: + return "SHADOW2D"; + case TextureType::ColorArray2D: + return "SHADOWARRAY2D"; + case TextureType::Color3D: + return "SHADOW3D"; + case TextureType::ColorCube: + return "SHADOWCUBE"; + case TextureType::ColorArrayCube: + return "SHADOWARRAYCUBE"; + case TextureType::Buffer: + return "SHADOWBUFFER"; + } + } else { + switch (info.type) { + case TextureType::Color1D: + return "1D"; + case TextureType::ColorArray1D: + return "ARRAY1D"; + case TextureType::Color2D: + return "2D"; + case TextureType::ColorArray2D: + return "ARRAY2D"; + case TextureType::Color3D: + return "3D"; + case TextureType::ColorCube: + return "CUBE"; + case TextureType::ColorArrayCube: + return "ARRAYCUBE"; + case TextureType::Buffer: + return "BUFFER"; + } } throw InvalidArgument("Invalid texture type {}", info.type.Value()); } @@ -217,7 +216,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: const auto info{inst.Flags()}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; - const std::string_view type{ShadowTextureType(info)}; + const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; const std::string offset_vec{Offset(ctx, offset)}; const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; @@ -319,7 +318,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: const auto info{inst.Flags()}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; - const std::string_view type{ShadowTextureType(info)}; + const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; const std::string offset_vec{Offset(ctx, offset)}; const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; @@ -389,7 +388,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde const auto info{inst.Flags()}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; - const std::string_view type{ShadowTextureType(info)}; + const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; const Register coord_vec{ctx.reg_alloc.Consume(coord)}; const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; @@ -429,10 +428,12 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst throw NotImplementedException("GLASM instruction"); } -void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register lod) { - throw NotImplementedException("GLASM instruction"); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + ScalarF32 lod) { + const auto info{inst.Flags()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); } void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 53e9bdac7..17bec1d58 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -543,7 +543,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register offset, Register lod, Register ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - Register lod); + ScalarF32 lod); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register derivates, Register offset, Register lod_clamp); From e8ed90480518c200c8beaefa0ddb2bbb164a429c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:06:19 -0300 Subject: [PATCH 410/770] glasm: Implement gl_WorkGroupID --- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 17bec1d58..7893f1c2a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -66,7 +66,7 @@ void EmitSetZFlag(EmitContext& ctx); void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); -void EmitWorkgroupId(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 015cb5576..f8a7e2781 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -144,8 +144,8 @@ void EmitSetOFlag(EmitContext& ctx) { NotImplemented(); } -void EmitWorkgroupId(EmitContext& ctx) { - NotImplemented(); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {},invocation.groupid;", inst); } void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { From 85fc7e584ef9d64bae3269e7993bbf919bd10640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:07:18 -0300 Subject: [PATCH 411/770] HACK: Bind stages before and after bindings Works around a bug where program parameters are only applied to the current stage, and this one wasn't bound at the moment. Affects all SSBO usages on GLASM. --- .../renderer_opengl/gl_graphics_program.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 7c0bf7bc8..4ac026502 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -240,6 +240,17 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + // FIXME: Unhack this + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; From 2e0d56da7e9c294f690590a1db6067f3034af665 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:30:09 -0300 Subject: [PATCH 412/770] glasm: Implement SelectU1 --- .../backend/glasm/emit_glasm_instructions.h | 3 ++- src/shader_recompiler/backend/glasm/emit_glasm_select.cpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 7893f1c2a..8a707d924 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -179,7 +179,8 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); -void EmitSelectU1(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value); void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp index b9e5cbbbe..68fff613c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp @@ -9,9 +9,9 @@ namespace Shader::Backend::GLASM { -void EmitSelectU1([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, - [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { - throw NotImplementedException("GLASM instruction"); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, + ScalarS32 false_value) { + ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); } void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, From 98ed8ff103a975be93dbbb89d554fbea355aacde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:30:24 -0300 Subject: [PATCH 413/770] glasm: Implement BFE.CC --- .../backend/glasm/emit_glasm_integer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 1befeca91..ba69c7a1b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -122,6 +122,14 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal "BFE.U {},RC,{};", count, offset, ret, base); } + if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { + ctx.Add("SEQ.S {},{},0;", *zero, ret); + zero->Invalidate(); + } + if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { + ctx.Add("SLT.S {},{},0;", *sign, ret); + sign->Invalidate(); + } } void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { From 3777592ada560e2943a066f7d2d62fd468dbf62c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 19 May 2021 00:29:07 -0400 Subject: [PATCH 414/770] glasm: Implement IADD.CC --- .../backend/glasm/emit_glasm_integer.cpp | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index ba69c7a1b..ff353df8d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -9,7 +9,32 @@ namespace Shader::Backend::GLASM { void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { - ctx.Add("ADD.S {}.x,{},{};", inst, a, b); + const bool cc{inst.HasAssociatedPseudoOperation()}; + const std::string_view cc_mod{cc ? ".CC" : ""}; + if (cc) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b); + if (!cc) { + return; + } + static constexpr std::array masks{"EQ", "SF", "CF", "OF"}; + const std::array flags{ + inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), + }; + for (size_t i = 0; i < flags.size(); ++i) { + if (flags[i]) { + const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; + ctx.Add("MOV.S {},0;" + "MOV.S {}({}.x),-1;", + flag_ret, flag_ret, masks[i]); + flags[i]->Invalidate(); + } + } } void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, From 4a22942f452dd1bba5e10d20217a0bfbf214dacb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 02:05:24 -0300 Subject: [PATCH 415/770] glasm: Implement ImageFetch --- .../backend/glasm/emit_context.cpp | 13 +++++--- .../backend/glasm/emit_context.h | 1 + .../backend/glasm/emit_glasm_image.cpp | 33 +++++++++++++++---- .../backend/glasm/emit_glasm_instructions.h | 4 +-- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index d1fe84a5f..e2182400c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -80,11 +80,14 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings) : info{progra Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } - const size_t num_textures{program.info.texture_descriptors.size()}; - texture_bindings.resize(num_textures); - for (size_t index = 0; index < num_textures; ++index) { - const auto& desc{program.info.texture_descriptors[index]}; - texture_bindings[index] = bindings.texture; + texture_buffer_bindings.reserve(program.info.texture_buffer_descriptors.size()); + for (const auto& desc : program.info.texture_buffer_descriptors) { + texture_buffer_bindings.push_back(bindings.texture); + bindings.texture += desc.count; + } + texture_bindings.reserve(program.info.texture_descriptors.size()); + for (const auto& desc : program.info.texture_descriptors) { + texture_bindings.push_back(bindings.texture); bindings.texture += desc.count; } } diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 084635c77..d6b0bf73c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -56,6 +56,7 @@ public: RegAlloc reg_alloc{*this}; const Info& info; + std::vector texture_buffer_bindings; std::vector texture_bindings; std::string_view stage_name = "invalid"; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 7b95505e2..333a003c9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -43,7 +43,11 @@ struct ScopedRegister { std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { // FIXME: indexed reads - return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); + if (info.type == TextureType::Buffer) { + return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); + } } std::string_view TextureType(IR::TextureInstInfo info) { @@ -421,11 +425,28 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde StoreSparse(ctx, sparse_inst); } -void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, - [[maybe_unused]] Register offset, [[maybe_unused]] Register lod, - [[maybe_unused]] Register ms) { - throw NotImplementedException("GLASM instruction"); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{Offset(ctx, offset)}; + const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (info.type == TextureType::Buffer) { + ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else if (ms.type != Type::Void) { + ctx.Add("MOV.S {}.w,{};" + "TXFMS.F{} {},{},{},{}{};", + coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } else { + ctx.Add("MOV.S {}.w,{};" + "TXF.F{} {},{},{},{}{};", + coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 8a707d924..119b166af 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -541,8 +541,8 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, const IR::Value& dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, - Register offset, Register lod, Register ms); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, ScalarF32 lod); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); From 12dcb9fcc256d016a46e15a61d139878fdd5caec Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 02:12:38 -0300 Subject: [PATCH 416/770] glasm: Implement ImageQueryLod --- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 333a003c9..747af84fe 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -457,9 +457,11 @@ void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); } -void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord) { - throw NotImplementedException("GLASM instruction"); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags()}; + const std::string texture{Texture(ctx, info, index)}; + const std::string_view type{TextureType(info)}; + ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); } void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 9415c435fc447f2cd5a9e5d92401f08931ac1261 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 02:25:53 -0300 Subject: [PATCH 417/770] glasm: Implement gl_PointCoord --- .../backend/glasm/emit_glasm_context_get_set.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 8c5cd286d..7bbb3e1af 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -65,6 +65,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::PositionW: ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.stage_name, swizzle); break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.stage_name, swizzle); + break; case IR::Attribute::InstanceId: ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.stage_name); break; From 2494dbe183660297d9b66d4300a28ca95f740f08 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 02:26:32 -0300 Subject: [PATCH 418/770] glasm: Implement gl_PointSize stores --- .../backend/glasm/emit_glasm_context_get_set.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 7bbb3e1af..f362dd2c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -93,6 +93,9 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, return; } switch (attr) { + case IR::Attribute::PointSize: + ctx.Add("MOV.F result.pointsize.x,{};", value); + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: From accad56ee7cc9f77886d164701a35f1e89a3519b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 16:32:03 -0300 Subject: [PATCH 419/770] glasm: Implement stores to gl_ViewportIndex --- .../backend/glasm/emit_context.cpp | 4 +++- .../backend/glasm/emit_context.h | 8 ++++++-- .../backend/glasm/emit_glasm.cpp | 16 ++++++++++++---- .../backend/glasm/emit_glasm_context_get_set.cpp | 8 ++++++++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e2182400c..395ac87f2 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -23,7 +23,8 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program, Bindings& bindings) : info{program.info} { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_) + : info{program.info}, profile{profile_} { // FIXME: Temporary partial implementation u32 cbuf_index{}; for (const auto& desc : program.info.constant_buffer_descriptors) { @@ -41,6 +42,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings) : info{progra if (const size_t num = program.info.storage_buffers_descriptors.size(); num > 0) { Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); } + stage = program.stage; switch (program.stage) { case Stage::VertexA: case Stage::VertexB: diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index d6b0bf73c..dd1f9ac9f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -11,10 +11,12 @@ #include #include "shader_recompiler/backend/glasm/reg_alloc.h" +#include "shader_recompiler/stage.h" namespace Shader { struct Info; -} +struct Profile; +} // namespace Shader namespace Shader::Backend { struct Bindings; @@ -29,7 +31,7 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program, Bindings& bindings); + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -55,10 +57,12 @@ public: std::string code; RegAlloc reg_alloc{*this}; const Info& info; + const Profile& profile; std::vector texture_buffer_bindings; std::vector texture_bindings; + Stage stage{}; std::string_view stage_name = "invalid"; }; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index edf6f5e13..9dc0cacbe 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -261,7 +261,10 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } -void SetupOptions(std::string& header, Info info) { +void SetupOptions(const IR::Program& program, const Profile& profile, std::string& header) { + const Info& info{program.info}; + const Stage stage{program.stage}; + // TODO: Track the shared atomic ops header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" @@ -286,6 +289,11 @@ void SetupOptions(std::string& header, Info info) { if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } + if ((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) { + if (profile.support_viewport_index_layer_non_geometry) { + header += "OPTION NV_viewport_array2;"; + } + } const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { header += "OPTION ARB_draw_buffers;"; @@ -312,12 +320,12 @@ std::string_view StageHeader(Stage stage) { } } // Anonymous namespace -std::string EmitGLASM(const Profile&, IR::Program& program, Bindings& bindings) { - EmitContext ctx{program, bindings}; +std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings, profile}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; - SetupOptions(header, program.info); + SetupOptions(program, profile, header); switch (program.stage) { case Stage::Compute: header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index f362dd2c8..6484387bc 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -102,6 +103,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, case IR::Attribute::PositionW: ctx.Add("MOV.F result.position.{},{};", swizzle, value); break; + case IR::Attribute::ViewportIndex: + if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.viewport.x,{};", value); + } else { + // LOG_WARNING + } + break; default: throw NotImplementedException("Set attribute {}", attr); } From 91a3c2c1c095880a3582f9362943db84b40064f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 16:32:21 -0300 Subject: [PATCH 420/770] glasm: Implement PREC on relevant instructions --- .../glasm/emit_glasm_floating_point.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index d2c324ad6..4ed58619d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { @@ -42,6 +43,11 @@ void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, "MIN.{} {}.x,RC.x,{};", type, min_value, value, type, ret, max_value); } + +std::string_view Precise(IR::Inst& inst) { + const bool precise{inst.Flags().no_contraction}; + return precise ? ".PREC" : ""; +} } // Anonymous namespace void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -63,11 +69,11 @@ void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { - ctx.Add("ADD.F {}.x,{},{};", inst, a, b); + ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); } void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { - ctx.LongAdd("ADD.F64 {}.x,{},{};", inst, a, b); + ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); } void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -77,11 +83,11 @@ void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { - ctx.Add("MAD.F {}.x,{},{},{};", inst, a, b, c); + ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c); } void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { - ctx.LongAdd("MAD.F64 {}.x,{},{},{};", inst, a, b, c); + ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c); } void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { @@ -106,11 +112,11 @@ void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { - ctx.Add("MUL.F {}.x,{},{};", inst, a, b); + ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); } void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { - ctx.LongAdd("MUL.F64 {}.x,{},{};", inst, a, b); + ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); } void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { From 96962c1d3c638d7cb15c89dcc9933b03237c03f6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 16:32:38 -0300 Subject: [PATCH 421/770] glasm: Fix phi instruction types --- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index f8a7e2781..066ed1012 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -29,7 +29,7 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) } const Register phi_reg{ctx.reg_alloc.Consume(phi)}; const Value eval_value{ctx.reg_alloc.Consume(value)}; - switch (phi.InstRecursive()->Arg(0).Type()) { + switch (phi.Inst()->Flags()) { case IR::Type::U1: case IR::Type::U32: case IR::Type::F32: From b60b3fa11389f9aca72bc00a4ab686b3bdb668f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 16:51:28 -0300 Subject: [PATCH 422/770] glasm: Fix compute stage name --- src/shader_recompiler/backend/glasm/emit_context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 395ac87f2..463930a18 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -57,7 +57,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile stage_name = "fragment"; break; case Stage::Compute: - stage_name = "compute"; + stage_name = "invocation"; break; } for (size_t index = 0; index < program.info.input_generics.size(); ++index) { From d957b3a8fef35ec0c2797e5ff0793f7236c08a3f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 16:51:52 -0300 Subject: [PATCH 423/770] glasm: Implement barriers --- .../backend/glasm/emit_glasm_not_implemented.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 066ed1012..f7fec0545 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -53,15 +53,15 @@ void EmitDemoteToHelperInvocation(EmitContext& ctx) { } void EmitBarrier(EmitContext& ctx) { - NotImplemented(); + ctx.Add("BAR;"); } void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { - NotImplemented(); + ctx.Add("MEMBAR.CTA;"); } void EmitDeviceMemoryBarrier(EmitContext& ctx) { - NotImplemented(); + ctx.Add("MEMBAR;"); } void EmitPrologue(EmitContext& ctx) { From 291f220be37d5fed36906b4fce977a5e4e23f481 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 19 May 2021 17:09:29 -0300 Subject: [PATCH 424/770] glasm: Implement 64-bit shifts --- .../backend/glasm/emit_glasm_instructions.h | 8 +++++--- .../backend/glasm/emit_glasm_integer.cpp | 18 +++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 119b166af..94e545ad4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -308,11 +308,13 @@ void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); -void EmitShiftLeftLogical64(EmitContext& ctx, Register base, Register shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); -void EmitShiftRightLogical64(EmitContext& ctx, Register base, Register shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift); void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); -void EmitShiftRightArithmetic64(EmitContext& ctx, Register base, Register shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarS32 shift); void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index ff353df8d..f75fcba47 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -75,27 +75,27 @@ void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Sc ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); } -void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, - [[maybe_unused]] Register shift) { - throw NotImplementedException("GLASM instruction"); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift) { + ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift); } void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); } -void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, - [[maybe_unused]] Register shift) { - throw NotImplementedException("GLASM instruction"); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarU32 shift) { + ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift); } void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); } -void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register base, - [[maybe_unused]] Register shift) { - throw NotImplementedException("GLASM instruction"); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, + ScalarS32 shift) { + ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift); } void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { From f79cbbf814b679f4302283852081faabec1316e8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 01:46:47 -0300 Subject: [PATCH 425/770] glasm: Implement ImageGradient --- .../backend/glasm/emit_glasm_image.cpp | 67 +++++++++++++++++-- .../backend/glasm/emit_glasm_instructions.h | 5 +- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 747af84fe..ab5a694fd 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -127,6 +127,27 @@ void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR:: offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); } +std::string GradOffset(const IR::Value& offset) { + if (offset.IsImmediate()) { + // LOG_WARNING immediate + return ""; + } + IR::Inst* const vector{offset.InstRecursive()}; + if (!vector->AreAllArgsImmediates()) { + // LOG_WARNING elements not immediate + return ""; + } + switch (vector->NumArgs()) { + case 1: + return fmt::format(",({})", static_cast(vector->Arg(0).U32())); + case 2: + return fmt::format(",({},{})", static_cast(vector->Arg(0).U32()), + static_cast(vector->Arg(1).U32())); + default: + throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); + } +} + std::pair Coord(EmitContext& ctx, const IR::Value& coord) { if (coord.IsImmediate()) { ScopedRegister scoped_reg(ctx.reg_alloc); @@ -464,11 +485,47 @@ void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); } -void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, - [[maybe_unused]] Register derivates, [[maybe_unused]] Register offset, - [[maybe_unused]] Register lod_clamp) { - throw NotImplementedException("GLASM instruction"); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp) { + const auto info{inst.Flags()}; + ScopedRegister dpdx, dpdy; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + if (multi_component) { + // Allocate this early to avoid aliasing other registers + dpdx = ScopedRegister{ctx.reg_alloc}; + dpdy = ScopedRegister{ctx.reg_alloc}; + } + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string texture{Texture(ctx, info, index)}; + const std::string offset_vec{GradOffset(offset)}; + const Register coord_vec{ctx.reg_alloc.Consume(coord)}; + const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + if (multi_component) { + ctx.Add("MOV.F {}.x,{}.x;" + "MOV.F {}.y,{}.z;" + "MOV.F {}.x,{}.y;" + "MOV.F {}.y,{}.w;", + dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, + dpdy.reg, derivatives_vec); + if (info.has_lod_clamp) { + const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; + ctx.Add("MOV.F {}.w,{};" + "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", + dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } else { + ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, + texture, type, offset_vec); + } + } else { + ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, + derivatives_vec, texture, type, offset_vec); + } + StoreSparse(ctx, sparse_inst); } void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 94e545ad4..7ccba4c6f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -548,8 +548,9 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, ScalarF32 lod); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, - Register derivates, Register offset, Register lod_clamp); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + const IR::Value& coord, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, Register color); From 3d0ffc6ad077174e2af9f3923fd99ced6f5e373b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 01:49:26 -0300 Subject: [PATCH 426/770] glasm: Implement EmitVertex and EndPrimitive --- .../backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 7ccba4c6f..0ce00db67 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -32,7 +32,7 @@ void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream); void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index f7fec0545..9b962427c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -72,12 +72,16 @@ void EmitEpilogue(EmitContext& ctx) { // TODO } -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - NotImplemented(); +void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { + ctx.Add("EMITS {};", stream); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { - NotImplemented(); + if (!stream.IsImmediate()) { + // LOG_WARNING not immediate + } + ctx.reg_alloc.Consume(stream); + ctx.Add("ENDPRIM;"); } void EmitGetRegister(EmitContext& ctx) { From dadd192b30d547dfa078057796a5ae16820eb4be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 02:18:52 -0300 Subject: [PATCH 427/770] glasm: Implement ImageRead --- .../backend/glasm/emit_context.cpp | 10 +++++ .../backend/glasm/emit_context.h | 2 + .../backend/glasm/emit_glasm.cpp | 3 +- .../backend/glasm/emit_glasm_image.cpp | 45 +++++++++++++++++-- 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 463930a18..8f418936e 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -82,6 +82,16 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } + image_buffer_bindings.reserve(program.info.image_buffer_descriptors.size()); + for (const auto& desc : program.info.image_buffer_descriptors) { + image_buffer_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + image_bindings.reserve(program.info.image_descriptors.size()); + for (const auto& desc : program.info.image_descriptors) { + image_bindings.push_back(bindings.image); + bindings.image += desc.count; + } texture_buffer_bindings.reserve(program.info.texture_buffer_descriptors.size()); for (const auto& desc : program.info.texture_buffer_descriptors) { texture_buffer_bindings.push_back(bindings.texture); diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index dd1f9ac9f..d5bab48ea 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -60,7 +60,9 @@ public: const Profile& profile; std::vector texture_buffer_bindings; + std::vector image_buffer_bindings; std::vector texture_bindings; + std::vector image_bindings; Stage stage{}; std::string_view stage_name = "invalid"; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 9dc0cacbe..3910d00ee 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -270,7 +270,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;" "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;"; + "OPTION ARB_derivative_control;" + "OPTION EXT_shader_image_load_formatted;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index ab5a694fd..beee9cf06 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -50,6 +50,16 @@ std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, } } +std::string Image(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + // FIXME: indexed reads + if (info.type == TextureType::Buffer) { + return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); + } +} + std::string_view TextureType(IR::TextureInstInfo info) { if (info.is_depth) { switch (info.type) { @@ -173,6 +183,28 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { sparse_ret, sparse_ret); sparse_inst->Invalidate(); } + +std::string_view FormatStorage(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return "U"; + case ImageFormat::R8_UINT: + return "U8"; + case ImageFormat::R8_SINT: + return "S8"; + case ImageFormat::R16_UINT: + return "U16"; + case ImageFormat::R16_SINT: + return "S16"; + case ImageFormat::R32_UINT: + return "U32"; + case ImageFormat::R32G32_UINT: + return "U32X2"; + case ImageFormat::R32G32B32A32_UINT: + return "U32X4"; + } + throw InvalidArgument("Invalid image format {}", format); +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, @@ -528,9 +560,16 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, StoreSparse(ctx, sparse_inst); } -void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord) { - throw NotImplementedException("GLASM instruction"); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { + const auto info{inst.Flags()}; + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); + StoreSparse(ctx, sparse_inst); } void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 8fdb00a2b5fccb2378497dc7229639851c60f02b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 02:40:58 -0300 Subject: [PATCH 428/770] glasm: Implement ImageWrite --- .../backend/glasm/emit_glasm_image.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index beee9cf06..ed0b65e16 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -572,10 +572,13 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Reg StoreSparse(ctx, sparse_inst); } -void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coord, - [[maybe_unused]] Register color) { - throw NotImplementedException("GLASM instruction"); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + Register color) { + const auto info{inst.Flags()}; + const std::string_view format{FormatStorage(info.image_format)}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); } void EmitBindlessImageSampleImplicitLod(EmitContext&) { From 0a54291c9c750d89feacef0644959326b9612b64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 15:58:39 -0300 Subject: [PATCH 429/770] glasm: Fix potential aliasing bug on cube array samples --- .../backend/glasm/emit_glasm_image.cpp | 71 +++++++++++-------- .../backend/glasm/emit_glasm_instructions.h | 8 +-- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index ed0b65e16..385ca51ac 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -268,9 +268,16 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - const IR::Value& coord, ScalarF32 dref, Register bias_lc, - const IR::Value& offset) { + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset) { + // Allocate early to avoid aliases const auto info{inst.Flags()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; @@ -287,14 +294,14 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: ctx.Add("MOV.F {}.z,{};" "MOV.F {}.w,{}.x;" "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", - coord_vec, dref, coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, - texture, type, offset_vec); + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + bias_lc_vec, texture, type, offset_vec); break; case TextureType::ColorArray2D: case TextureType::ColorCube: ctx.Add("MOV.F {}.w,{};" "TXB.F.LODCLAMP{} {},{},{},{},{}{};", - coord_vec, dref, sparse_mod, ret, coord_vec, bias_lc, texture, type, + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, offset_vec); break; default: @@ -309,25 +316,23 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: ctx.Add("MOV.F {}.z,{};" "MOV.F {}.w,{}.x;" "TXB.F{} {},{},{},{}{};", - coord_vec, dref, coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, - type, offset_vec); + coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, + texture, type, offset_vec); break; case TextureType::ColorArray2D: case TextureType::ColorCube: ctx.Add("MOV.F {}.w,{};" "TXB.F{} {},{},{},{},{}{};", - coord_vec, dref, sparse_mod, ret, coord_vec, bias_lc, texture, type, + coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, offset_vec); break; - case TextureType::ColorArrayCube: { - const ScopedRegister pair{ctx.reg_alloc}; + case TextureType::ColorArrayCube: ctx.Add("MOV.F {}.x,{};" "MOV.F {}.y,{}.x;" "TXB.F{} {},{},{},{},{}{};", - pair.reg, dref, pair.reg, bias_lc, sparse_mod, ret, coord_vec, pair.reg, - texture, type, offset_vec); + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); break; - } default: throw NotImplementedException("Invalid type {}", info.type.Value()); } @@ -340,15 +345,14 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: const char dref_swizzle{w_swizzle ? 'w' : 'z'}; ctx.Add("MOV.F {}.{},{};" "TEX.F.LODCLAMP{} {},{},{},{},{}{};", - coord_vec, dref_swizzle, dref, sparse_mod, ret, coord_vec, bias_lc, texture, - type, offset_vec); + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, + texture, type, offset_vec); } else { - const ScopedRegister pair{ctx.reg_alloc}; ctx.Add("MOV.F {}.x,{};" "MOV.F {}.y,{};" "TEX.F.LODCLAMP{} {},{},{},{},{}{};", - pair.reg, dref, pair.reg, bias_lc, sparse_mod, ret, coord_vec, pair.reg, - texture, type, offset_vec); + staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); } } else { if (info.type != TextureType::ColorArrayCube) { @@ -357,11 +361,10 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: const char dref_swizzle{w_swizzle ? 'w' : 'z'}; ctx.Add("MOV.F {}.{},{};" "TEX.F{} {},{},{},{}{};", - coord_vec, dref_swizzle, dref, sparse_mod, ret, coord_vec, texture, type, - offset_vec); + coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, + type, offset_vec); } else { - const ScopedRegister pair{ctx.reg_alloc}; - ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref, texture, + ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, type, offset_vec); } } @@ -370,9 +373,16 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: } void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - const IR::Value& coord, ScalarF32 dref, ScalarF32 lod, - const IR::Value& offset) { + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset) { + // Allocate early to avoid aliases const auto info{inst.Flags()}; + ScopedRegister staging; + if (info.type == TextureType::ColorArrayCube) { + staging = ScopedRegister{ctx.reg_alloc}; + } + const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; + const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; @@ -387,24 +397,23 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: ctx.Add("MOV.F {}.z,{};" "MOV.F {}.w,{};" "TXL.F{} {},{},{},{}{};", - coord_vec, dref, coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, + coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, offset_vec); break; case TextureType::ColorArray2D: case TextureType::ColorCube: ctx.Add("MOV.F {}.w,{};" "TXL.F{} {},{},{},{},{}{};", - coord_vec, dref, sparse_mod, ret, coord_vec, lod, texture, type, offset_vec); + coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, + offset_vec); break; - case TextureType::ColorArrayCube: { - const ScopedRegister pair{ctx.reg_alloc}; + case TextureType::ColorArrayCube: ctx.Add("MOV.F {}.x,{};" "MOV.F {}.y,{};" "TXL.F{} {},{},{},{},{}{};", - pair.reg, dref, pair.reg, lod, sparse_mod, ret, coord_vec, pair.reg, texture, type, - offset_vec); + staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, + staging.reg, texture, type, offset_vec); break; - } default: throw NotImplementedException("Invalid type {}", info.type.Value()); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 0ce00db67..22d427817 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -533,11 +533,11 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - const IR::Value& coord, ScalarF32 dref, Register bias_lc, - const IR::Value& offset); + const IR::Value& coord, const IR::Value& dref, + const IR::Value& bias_lc, const IR::Value& offset); void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - const IR::Value& coord, ScalarF32 dref, ScalarF32 lod, - const IR::Value& offset); + const IR::Value& coord, const IR::Value& dref, + const IR::Value& lod, const IR::Value& offset); void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, From c5ca4fe451c398542f4f6c5e468e0bb96866175d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:53:51 -0300 Subject: [PATCH 430/770] renderer_opengl: State track assembly programs --- .../renderer_opengl/gl_graphics_program.cpp | 27 +++------- .../renderer_opengl/gl_graphics_program.h | 1 + .../renderer_opengl/gl_shader_manager.h | 51 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 4ac026502..b5d75aa13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -42,6 +42,9 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } u32 num_textures{}; u32 num_images{}; for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { @@ -182,6 +185,9 @@ void GraphicsProgram::Configure(bool is_indexed) { const std::span indices_span(image_view_indices.data(), image_view_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + ImageId* texture_buffer_index{image_view_ids.data()}; const auto bind_stage_info{[&](size_t stage) { size_t index{}; @@ -240,14 +246,8 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - // FIXME: Unhack this if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindProgram(program.handle); } @@ -300,19 +300,6 @@ void GraphicsProgram::Configure(bool is_indexed) { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - texture_cache.UpdateRenderTargets(false); - - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); - } else { - program_manager.BindProgram(program.handle); - } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 58aa4b0bc..18292bb16 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -89,6 +89,7 @@ private: OGLProgram program; std::array assembly_programs; + u32 enabled_stages_mask{}; std::array stage_infos{}; std::array base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 70781d6f5..48669b3cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,24 +4,69 @@ #pragma once +#include +#include + #include +#include "video_core/renderer_opengl/gl_resource_manager.h" + +#pragma optimize("", off) + namespace OpenGL { class ProgramManager { + static constexpr size_t NUM_STAGES = 5; + + static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + public: void BindProgram(GLuint program) { - if (bound_program == program) { + if (current_source_program == program) { return; } - bound_program = program; + current_source_program = program; glUseProgram(program); } + void BindAssemblyPrograms(std::span programs, + u32 stage_mask) { + const u32 changed_mask = current_assembly_mask ^ stage_mask; + current_assembly_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_assembly_programs[stage] != programs[stage].handle) { + current_assembly_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void RestoreGuestCompute() {} private: - GLuint bound_program = 0; + GLuint current_source_program = 0; + + u32 current_assembly_mask = 0; + std::array current_assembly_programs; }; } // namespace OpenGL From 690b1841e6a1437335c0aae6d934f3fdcdb1680c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:59:05 -0300 Subject: [PATCH 431/770] renderer_opengl: State track compute assembly programs --- .../renderer_opengl/gl_compute_program.cpp | 5 +---- .../renderer_opengl/gl_shader_manager.h | 19 +++++++++++++++++++ .../renderer_opengl/renderer_opengl.cpp | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index fb54618a4..ce52a0052 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -125,10 +125,7 @@ void ComputeProgram::Configure() { texture_cache.FillComputeImageViews(indices_span, image_view_ids); if (assembly_program.handle != 0) { - // FIXME: State track this - glEnable(GL_COMPUTE_PROGRAM_NV); - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); - program_manager.BindProgram(0); + program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { program_manager.BindProgram(source_program.handle); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 48669b3cd..df7e1f644 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -10,6 +10,7 @@ #include #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #pragma optimize("", off) @@ -24,6 +25,12 @@ class ProgramManager { }; public: + explicit ProgramManager(const Device& device) { + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } + } + void BindProgram(GLuint program) { if (current_source_program == program) { return; @@ -32,6 +39,17 @@ public: glUseProgram(program); } + void BindComputeAssemblyProgram(GLuint program) { + if (current_compute_assembly_program != program) { + current_compute_assembly_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void BindAssemblyPrograms(std::span programs, u32 stage_mask) { const u32 changed_mask = current_assembly_mask ^ stage_mask; @@ -67,6 +85,7 @@ private: u32 current_assembly_mask = 0; std::array current_assembly_programs; + GLuint current_compute_assembly_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4e77ef808..a4805f3da 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,6 +130,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, + program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); From fad139a3e6f8273acb5b14296ba8fcbd0946fe76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 17:27:39 -0300 Subject: [PATCH 432/770] glasm: Declare geometry program headers --- .../backend/glasm/emit_glasm.cpp | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 3910d00ee..b6b8d504e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -319,6 +319,34 @@ std::string_view StageHeader(Stage stage) { } throw InvalidArgument("Invalid stage {}", stage); } + +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "POINTS"; + case InputTopology::Lines: + return "LINES"; + case InputTopology::LinesAdjacency: + return "LINESS_ADJACENCY"; + case InputTopology::Triangles: + return "TRIANGLES"; + case InputTopology::TrianglesAdjacency: + return "TRIANGLES_ADJACENCY"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "POINTS"; + case OutputTopology::LineStrip: + return "LINE_STRIP"; + case OutputTopology::TriangleStrip: + return "TRIANGLE_STRIP"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} } // Anonymous namespace std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bindings) { @@ -328,6 +356,13 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi std::string header{StageHeader(program.stage)}; SetupOptions(program, profile, header); switch (program.stage) { + case Stage::Geometry: + header += fmt::format("PRIMITIVE_IN {};" + "PRIMITIVE_OUT {};" + "VERTICES_OUT {};", + InputPrimitive(profile.input_topology), + OutputPrimitive(program.output_topology), program.output_vertices); + break; case Stage::Compute: header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], program.workgroup_size[1], program.workgroup_size[2]); From 83cef0426b058014ad94dd687a29e7c31a1fbbef Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 17:28:09 -0300 Subject: [PATCH 433/770] glasm: Properly declare attributes on geometry programs --- src/shader_recompiler/backend/glasm/emit_context.cpp | 9 ++++++++- src/shader_recompiler/backend/glasm/emit_context.h | 1 + .../backend/glasm/emit_glasm_context_get_set.cpp | 10 +++++----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 8f418936e..a9bbb680f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -47,24 +47,31 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile case Stage::VertexA: case Stage::VertexB: stage_name = "vertex"; + attrib_name = "vertex"; break; case Stage::TessellationControl: case Stage::TessellationEval: + stage_name = "primitive"; + attrib_name = "primitive"; + break; case Stage::Geometry: stage_name = "primitive"; + attrib_name = "vertex"; break; case Stage::Fragment: stage_name = "fragment"; + attrib_name = "fragment"; break; case Stage::Compute: stage_name = "invocation"; break; } + const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; for (size_t index = 0; index < program.info.input_generics.size(); ++index) { const auto& generic{program.info.input_generics[index]}; if (generic.used) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, stage_name, index, index); + InterpDecorator(generic.interpolation), index, attr_stage, index, index); } } for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index d5bab48ea..e76ed1d7c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -66,6 +66,7 @@ public: Stage stage{}; std::string_view stage_name = "invalid"; + std::string_view attrib_name = "invalid"; }; } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 6484387bc..a81bd209b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -64,20 +64,20 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.stage_name, swizzle); + ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: - ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.stage_name, swizzle); + ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); break; case IR::Attribute::InstanceId: - ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.stage_name); + ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); break; case IR::Attribute::VertexId: - ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.stage_name); + ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); break; case IR::Attribute::FrontFace: - ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.stage_name); + ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); break; default: throw NotImplementedException("Get attribute {}", attr); From 79929be8331fabdb83b5595704241f863a0ae33a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 19:21:38 -0300 Subject: [PATCH 434/770] glasm: Implement geometry shader attribute reads --- .../backend/glasm/emit_context.cpp | 3 +++ .../glasm/emit_glasm_context_get_set.cpp | 19 +++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index a9bbb680f..d8451b41f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -74,6 +74,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile InterpDecorator(generic.interpolation), index, attr_stage, index, index); } } + if (stage == Stage::Geometry && info.loads_position) { + Add("ATTRIB vertex_position=vertex.position;"); + } for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { if (!program.info.stores_frag_color[index]) { continue; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index a81bd209b..d736775c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -19,6 +19,14 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU const Register ret{ctx.reg_alloc.Define(inst)}; ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); } + +std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { + if (ctx.stage == Stage::Geometry) { + return fmt::format("[{}]", vertex); + } else { + return ""; + } +} } // Anonymous namespace void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { @@ -50,13 +58,12 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding GetCbuf(ctx, inst, binding, offset, "U32X2"); } -void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, - [[maybe_unused]] ScalarU32 vertex) { +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) { const u32 element{static_cast(attr) % 4}; const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.Add("MOV.F {}.x,in_attr{}[0].{};", inst, index, swizzle); + ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); return; } switch (attr) { @@ -64,7 +71,11 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); + if (ctx.stage == Stage::Geometry) { + ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); + } else { + ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); + } break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: From 679e7146a701b961ac83b7f5f074ebd5a1bd7e45 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 19:22:08 -0300 Subject: [PATCH 435/770] glasm: Optimize EmitVertex into EMIT --- .../backend/glasm/emit_glasm_not_implemented.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 9b962427c..82abbdab3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -73,7 +73,11 @@ void EmitEpilogue(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { - ctx.Add("EMITS {};", stream); + if (stream.type == Type::U32 && stream.imm_u32) { + ctx.Add("EMIT;"); + } else { + ctx.Add("EMITS {};", stream); + } } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { From d5db96386d08652a8fd90757d9933835c06a84d6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 20:36:04 -0300 Subject: [PATCH 436/770] glasm: Implement InvocationId --- src/shader_recompiler/backend/glasm/emit_context.cpp | 3 +++ .../backend/glasm/emit_glasm_not_implemented.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index d8451b41f..7b25fa042 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -77,6 +77,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (stage == Stage::Geometry && info.loads_position) { Add("ATTRIB vertex_position=vertex.position;"); } + if (info.uses_invocation_id) { + Add("ATTRIB primitive_invocation=primitive.invocation;"); + } for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { if (!program.info.stores_frag_color[index]) { continue; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 82abbdab3..b60c907df 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -160,8 +160,8 @@ void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.S {},invocation.localid;", inst); } -void EmitInvocationId(EmitContext& ctx) { - NotImplemented(); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); } void EmitSampleId(EmitContext& ctx) { From 164b8c1ec5385339c8c29368cc02512aff4a6590 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 20:59:55 -0300 Subject: [PATCH 437/770] glasm: Fix InvocationId declaration --- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 22d427817..b0af02235 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -68,7 +68,7 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); -void EmitInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx); From a569ac418e232ed631b5a884a1d54aaa619d8341 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:00:17 -0300 Subject: [PATCH 438/770] glasm: Implement patch memory --- .../backend/glasm/emit_context.cpp | 18 +++++++++ .../glasm/emit_glasm_context_get_set.cpp | 37 ++++++++++++++++--- .../backend/glasm/emit_glasm_instructions.h | 2 +- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 7b25fa042..bb68b3d19 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -80,6 +80,24 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.uses_invocation_id) { Add("ATTRIB primitive_invocation=primitive.invocation;"); } + if (info.stores_tess_level_outer) { + Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); + } + if (info.stores_tess_level_inner) { + Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + if (stage == Stage::TessellationEval) { + Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};", index, index, + index); + } else { + Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, + index, index); + } + } for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { if (!program.info.stores_frag_color[index]) { continue; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index d736775c8..c3a2c6b70 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -136,13 +136,40 @@ void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] throw NotImplementedException("GLASM instruction"); } -void EmitGetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch patch) { - throw NotImplementedException("GLASM instruction"); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("MOV.F {},result.patch.attrib[{}].{}", inst, index, "xyzw"[element]); } -void EmitSetPatch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Patch patch, - [[maybe_unused]] ScalarF32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("MOV.F result.patch.attrib[{}].{},{}", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } } void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index b0af02235..2eb1eb123 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -53,7 +53,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); void EmitGetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarU32 vertex); void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); -void EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); From c0e4074721825e2af7be4f1a70408f5edb06597d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:00:55 -0300 Subject: [PATCH 439/770] gl_shader_manager: Remove unintentionally committed #pragma --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index df7e1f644..c922bcf82 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,8 +12,6 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" -#pragma optimize("", off) - namespace OpenGL { class ProgramManager { From 54decced922aaa73f4c30d696679f3602c930204 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:01:41 -0300 Subject: [PATCH 440/770] gl_shader_manager: Zero initialize current assembly programs --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index c922bcf82..5ec57d707 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -82,7 +82,7 @@ private: GLuint current_source_program = 0; u32 current_assembly_mask = 0; - std::array current_assembly_programs; + std::array current_assembly_programs{}; GLuint current_compute_assembly_program = 0; }; From 2913ca811ea95527d0a0ae653348e3bdc90407fd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:06:50 -0300 Subject: [PATCH 441/770] glasm: Implement TessellationEvaluationPoint --- .../backend/glasm/emit_glasm_context_get_set.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index c3a2c6b70..a728225b0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -77,6 +77,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); } break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.Add("MOV.F {}.x,vertex.tesscoord.{}", inst, swizzle); + break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); From 9ec2303ad6a399cea9e66fa522f65671046f1879 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:18:39 -0300 Subject: [PATCH 442/770] glasm: Add tessellation shader declarations --- .../backend/glasm/emit_glasm.cpp | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index b6b8d504e..476cdda54 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -347,6 +347,30 @@ std::string_view OutputPrimitive(OutputTopology topology) { } throw InvalidArgument("Invalid output topology {}", topology); } + +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "TRIANGLES"; + case TessPrimitive::Quads: + return "QUADS"; + case TessPrimitive::Isolines: + return "ISOLINES"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "EQUAL"; + case TessSpacing::FractionalOdd: + return "FRACTIONAL_ODD"; + case TessSpacing::FractionalEven: + return "FRACTIONAL_EVEN"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} } // Anonymous namespace std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bindings) { @@ -356,6 +380,17 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi std::string header{StageHeader(program.stage)}; SetupOptions(program, profile, header); switch (program.stage) { + case Stage::TessellationControl: + header += fmt::format("VERTICES_OUT {};", program.invocations); + break; + case Stage::TessellationEval: + header += + fmt::format("TESS_MODE {};" + "TESS_SPACING {};" + "TESS_VERTEX_ORDER {};", + GetTessMode(profile.tess_primitive), GetTessSpacing(profile.tess_spacing), + profile.tess_clockwise ? "CW" : "CCW"); + break; case Stage::Geometry: header += fmt::format("PRIMITIVE_IN {};" "PRIMITIVE_OUT {};" From 48d4e263264e9ae0214ad6f0064e8e32aba17fc4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:39:45 -0300 Subject: [PATCH 443/770] glasm: Fix tessellation headers --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 476cdda54..4fc7d2f2f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -307,9 +307,9 @@ std::string_view StageHeader(Stage stage) { case Stage::VertexB: return "!!NVvp5.0\n"; case Stage::TessellationControl: - return "!!NVtcs5.0\n"; + return "!!NVtcp5.0\n"; case Stage::TessellationEval: - return "!!NVtes5.0\n"; + return "!!NVtep5.0\n"; case Stage::Geometry: return "!!NVgp5.0\n"; case Stage::Fragment: From 0d7d85c81eb3bb931916ec5b85bcb42761a00522 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:40:49 -0300 Subject: [PATCH 444/770] glasm: Add missing semicolon on tesscoord reading --- .../backend/glasm/emit_glasm_context_get_set.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index a728225b0..cdba4fa89 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -79,7 +79,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal break; case IR::Attribute::TessellationEvaluationPointU: case IR::Attribute::TessellationEvaluationPointV: - ctx.Add("MOV.F {}.x,vertex.tesscoord.{}", inst, swizzle); + ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: From a5d978e91ec66d21764bb74047881cb614425935 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:48:09 -0300 Subject: [PATCH 445/770] glasm: Fix tessellation input attributes --- .../backend/glasm/emit_glasm_context_get_set.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index cdba4fa89..7a084508d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -21,9 +21,12 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU } std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return fmt::format("[{}]", vertex); - } else { + default: return ""; } } From 394b96a2fe0fb7c93052f043383889e46ff6202a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:54:02 -0300 Subject: [PATCH 446/770] glasm: Implement clip distance stores --- src/shader_recompiler/backend/glasm/emit_context.cpp | 3 +++ .../backend/glasm/emit_glasm_context_get_set.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index bb68b3d19..e82c8995a 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -86,6 +86,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } + if (info.stores_clip_distance) { + Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); + } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { continue; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 7a084508d..b64025420 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -121,6 +121,18 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, case IR::Attribute::PositionW: ctx.Add("MOV.F result.position.{},{};", swizzle, value); break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast(attr) - static_cast(IR::Attribute::ClipDistance0)}; + ctx.Add("MOV.F result.clip[{}].x,{};", index, value); + break; + } case IR::Attribute::ViewportIndex: if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.viewport.x,{};", value); From 3da7b98d376cc0b8ec00de80755d9e90fc90e3a8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:56:40 -0300 Subject: [PATCH 447/770] glasm: Implement PrimitiveId attribute read --- .../backend/glasm/emit_glasm_context_get_set.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index b64025420..116b42361 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -70,6 +70,9 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal return; } switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.Add("MOV.S {}.x,primitive.id;", inst); + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: From 36d040da7059e438fa35f1a5de5d5aed4cef5ca4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 19 May 2021 01:00:51 -0400 Subject: [PATCH 448/770] glasm: Implement FSWZADD --- .../backend/glasm/emit_glasm.cpp | 16 +++++++++++++++- .../backend/glasm/emit_glasm_instructions.h | 3 ++- .../backend/glasm/emit_glasm_warp.cpp | 13 +++++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4fc7d2f2f..f110fd7f8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -281,7 +281,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "OPTION NV_shader_atomic_fp16_vector;"; } - if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote) { + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_fswzadd) { header += "OPTION NV_shader_thread_group;"; } if (info.uses_subgroup_shuffles) { @@ -416,12 +417,25 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi if (program.local_memory_size > 0) { header += fmt::format("lmem[{}],", program.local_memory_size); } + if (program.info.uses_fswzadd) { + header += "FSWZA[4],FSWZB[4],"; + } header += "RC;" "LONG TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { header += fmt::format("D{},", index); } header += "DC;"; + if (program.info.uses_fswzadd) { + header += "MOV.F FSWZA[0],-1;" + "MOV.F FSWZA[1],1;" + "MOV.F FSWZA[2],-1;" + "MOV.F FSWZA[3],0;" + "MOV.F FSWZB[0],-1;" + "MOV.F FSWZB[1],-1;" + "MOV.F FSWZB[2],1;" + "MOV.F FSWZB[3],-1;"; + } ctx.code.insert(0, header); ctx.code += "END"; return ctx.code; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 2eb1eb123..15380a955 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -616,7 +616,8 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU3 const IR::Value& clamp, const IR::Value& segmentation_mask); void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, const IR::Value& clamp, const IR::Value& segmentation_mask); -void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle); void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 0f987daeb..af0e13d43 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -95,8 +95,17 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, Sca Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); } -void EmitFSwizzleAdd(EmitContext&, ScalarF32, ScalarF32, ScalarU32) { - throw NotImplementedException("GLASM instruction"); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, + ScalarU32 swizzle) { + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("AND.U RC.z,{}.threadid,3;" + "SHL.U RC.z,RC.z,1;" + "SHR.U RC.z,{},RC.z;" + "AND.U RC.z,RC.z,3;" + "MUL.F RC.x,{},FSWZA[RC.z];" + "MUL.F RC.y,{},FSWZB[RC.z];" + "ADD.F {}.x,RC.x,RC.y;", + ctx.stage_name, swizzle, op_a, op_b, ret); } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { From 781a87175ce4b28d0048433a78567551c5f0815d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 22:08:56 -0300 Subject: [PATCH 449/770] glasm: Fix patch attribute declarations --- src/shader_recompiler/backend/glasm/emit_context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e82c8995a..e42f186c1 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -93,7 +93,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (!info.uses_patches[index]) { continue; } - if (stage == Stage::TessellationEval) { + if (stage == Stage::TessellationControl) { Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};", index, index, index); } else { From eb156679057340524c3e1e00d4910c88a2f3c6c2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 20 May 2021 22:59:46 -0400 Subject: [PATCH 450/770] emit_glasm_context_get_and_set.cpp: Add missing semicolons --- .../backend/glasm/emit_glasm_context_get_set.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 116b42361..34669160a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -164,14 +164,14 @@ void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { } const u32 index{IR::GenericPatchIndex(patch)}; const u32 element{IR::GenericPatchElement(patch)}; - ctx.Add("MOV.F {},result.patch.attrib[{}].{}", inst, index, "xyzw"[element]); + ctx.Add("MOV.F {},result.patch.attrib[{}].{};", inst, index, "xyzw"[element]); } void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { if (IR::IsGeneric(patch)) { const u32 index{IR::GenericPatchIndex(patch)}; const u32 element{IR::GenericPatchElement(patch)}; - ctx.Add("MOV.F result.patch.attrib[{}].{},{}", index, "xyzw"[element], value); + ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value); return; } switch (patch) { From 9e7b6622c25aa858b96bf0f1c7f94223a2f449a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:12:32 -0300 Subject: [PATCH 451/770] shader: Split profile and runtime information in separate structs --- .../backend/glasm/emit_context.cpp | 40 +- .../backend/glasm/emit_context.h | 6 +- .../backend/glasm/emit_glasm.cpp | 19 +- .../backend/glasm/emit_glasm.h | 6 +- .../backend/spirv/emit_context.cpp | 26 +- .../backend/spirv/emit_context.h | 4 +- .../backend/spirv/emit_spirv.cpp | 20 +- .../backend/spirv/emit_spirv.h | 6 +- .../spirv/emit_spirv_context_get_set.cpp | 4 +- .../backend/spirv/emit_spirv_special.cpp | 15 +- src/shader_recompiler/profile.h | 13 +- .../renderer_opengl/gl_shader_cache.cpp | 26 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 418 +++++++++--------- .../renderer_vulkan/vk_pipeline_cache.h | 5 +- 14 files changed, 300 insertions(+), 308 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e42f186c1..659ff6d17 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -23,23 +23,25 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_) - : info{program.info}, profile{profile_} { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : profile{profile_}, runtime_info{runtime_info_} { // FIXME: Temporary partial implementation + const auto& info{program.info}; u32 cbuf_index{}; - for (const auto& desc : program.info.constant_buffer_descriptors) { + for (const auto& desc : info.constant_buffer_descriptors) { if (desc.count != 1) { throw NotImplementedException("Constant buffer descriptor array"); } Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } - for (const auto& desc : program.info.storage_buffers_descriptors) { + for (const auto& desc : info.storage_buffers_descriptors) { if (desc.count != 1) { throw NotImplementedException("Storage buffer descriptor array"); } } - if (const size_t num = program.info.storage_buffers_descriptors.size(); num > 0) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); } stage = program.stage; @@ -67,8 +69,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - const auto& generic{program.info.input_generics[index]}; + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const auto& generic{info.input_generics[index]}; if (generic.used) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", InterpDecorator(generic.interpolation), index, attr_stage, index, index); @@ -101,8 +103,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile index, index); } } - for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { - if (!program.info.stores_frag_color[index]) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index]) { continue; } if (index == 0) { @@ -111,28 +113,28 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < program.info.stores_generics.size(); ++index) { - if (program.info.stores_generics[index]) { + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } - image_buffer_bindings.reserve(program.info.image_buffer_descriptors.size()); - for (const auto& desc : program.info.image_buffer_descriptors) { + image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { image_buffer_bindings.push_back(bindings.image); bindings.image += desc.count; } - image_bindings.reserve(program.info.image_descriptors.size()); - for (const auto& desc : program.info.image_descriptors) { + image_bindings.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { image_bindings.push_back(bindings.image); bindings.image += desc.count; } - texture_buffer_bindings.reserve(program.info.texture_buffer_descriptors.size()); - for (const auto& desc : program.info.texture_buffer_descriptors) { + texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { texture_buffer_bindings.push_back(bindings.texture); bindings.texture += desc.count; } - texture_bindings.reserve(program.info.texture_descriptors.size()); - for (const auto& desc : program.info.texture_descriptors) { + texture_bindings.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { texture_bindings.push_back(bindings.texture); bindings.texture += desc.count; } diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index e76ed1d7c..1f057fdd5 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -16,6 +16,7 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Backend { @@ -31,7 +32,8 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -56,8 +58,8 @@ public: std::string code; RegAlloc reg_alloc{*this}; - const Info& info; const Profile& profile; + const RuntimeInfo& runtime_info; std::vector texture_buffer_bindings; std::vector image_buffer_bindings; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index f110fd7f8..edff04a44 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -374,8 +374,9 @@ std::string_view GetTessSpacing(TessSpacing spacing) { } } // Anonymous namespace -std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bindings) { - EmitContext ctx{program, bindings, profile}; +std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; @@ -385,18 +386,18 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi header += fmt::format("VERTICES_OUT {};", program.invocations); break; case Stage::TessellationEval: - header += - fmt::format("TESS_MODE {};" - "TESS_SPACING {};" - "TESS_VERTEX_ORDER {};", - GetTessMode(profile.tess_primitive), GetTessSpacing(profile.tess_spacing), - profile.tess_clockwise ? "CW" : "CCW"); + header += fmt::format("TESS_MODE {};" + "TESS_SPACING {};" + "TESS_VERTEX_ORDER {};", + GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "CW" : "CCW"); break; case Stage::Geometry: header += fmt::format("PRIMITIVE_IN {};" "PRIMITIVE_OUT {};" "VERTICES_OUT {};", - InputPrimitive(profile.input_topology), + InputPrimitive(runtime_info.input_topology), OutputPrimitive(program.output_topology), program.output_vertices); break; case Stage::Compute: diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index a0dfdd818..3d02d873e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -12,12 +12,12 @@ namespace Shader::Backend::GLASM { -[[nodiscard]] std::string EmitGLASM(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitGLASM(profile, program, binding); + return EmitGLASM(profile, {}, program, binding); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a98e08392..3e8899f53 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -136,7 +136,7 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, break; case Stage::Geometry: if (per_invocation) { - const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; type = ctx.TypeArray(type, ctx.Const(num_vertices)); } break; @@ -161,8 +161,8 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo while (element < 4) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; - if (!ctx.profile.xfb_varyings.empty()) { - xfb_varying = &ctx.profile.xfb_varyings[base_attr_index + element]; + if (!ctx.runtime_info.xfb_varyings.empty()) { + xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; } const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; @@ -208,7 +208,7 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { } std::optional AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.profile.generic_input_types.at(index)}; + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: return AttrInfo{ctx.input_f32, ctx.F32[1], false}; @@ -441,13 +441,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& binding) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + IR::Program& program, Bindings& bindings) + : Sirit::Module(profile_.supported_spirv), profile{profile_}, + runtime_info{runtime_info_}, stage{program.stage} { const bool is_unified{profile.unified_descriptor_binding}; - u32& uniform_binding{is_unified ? binding.unified : binding.uniform_buffer}; - u32& storage_binding{is_unified ? binding.unified : binding.storage_buffer}; - u32& texture_binding{is_unified ? binding.unified : binding.texture}; - u32& image_binding{is_unified ? binding.unified : binding.image}; + u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; + u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; + u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; + u32& image_binding{is_unified ? bindings.unified : bindings.image}; AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); @@ -1211,7 +1213,7 @@ void EmitContext::DefineInputs(const Info& info) { if (!generic.used) { continue; } - const AttributeType input_type{profile.generic_input_types[index]}; + const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1256,7 +1258,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || profile.fixed_state_point_size) { + if (info.stores_point_size || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d2b79f6c1..961c9180c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -103,7 +103,8 @@ struct GenericElementInfo { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -150,6 +151,7 @@ public: } const Profile& profile; + const RuntimeInfo& runtime_info; Stage stage{}; Id void_id{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3e20ac3b9..cba420cda 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -226,16 +226,17 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Stage::TessellationEval: execution_model = spv::ExecutionModel::TessellationEvaluation; ctx.AddCapability(spv::Capability::Tessellation); - ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive)); - ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing)); - ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); + ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise + ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); break; case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::GeometryStreams); - switch (ctx.profile.input_topology) { + switch (ctx.runtime_info.input_topology) { case InputTopology::Points: ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); break; @@ -279,7 +280,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (program.info.stores_frag_depth) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } - if (ctx.profile.force_early_z) { + if (ctx.runtime_info.force_early_z) { ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); } break; @@ -402,7 +403,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_sample_id) { ctx.AddCapability(spv::Capability::SampleRateShading); } - if (!ctx.profile.xfb_varyings.empty()) { + if (!ctx.runtime_info.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } if (info.uses_derivatives) { @@ -433,8 +434,9 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding) { - EmitContext ctx{profile, program, binding}; +std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings) { + EmitContext ctx{profile, runtime_info, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (profile.support_float_controls) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index d8ab2d8ed..db0c935fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,12 +16,12 @@ namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitSPIRV(profile, program, binding); + return EmitSPIRV(profile, {}, program, binding); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8e57ff070..c1b69c234 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -17,7 +17,7 @@ struct AttrInfo { }; std::optional AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.profile.generic_input_types.at(index)}; + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: return AttrInfo{ctx.input_f32, ctx.F32[1], false}; @@ -468,7 +468,7 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { } Id EmitYDirection(EmitContext& ctx) { - return ctx.Const(ctx.profile.y_negate ? -1.0f : 1.0f); + return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index ba948f3c9..072a3b1bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -18,8 +18,8 @@ void ConvertDepthMode(EmitContext& ctx) { } void SetFixedPipelinePointSize(EmitContext& ctx) { - if (ctx.profile.fixed_state_point_size) { - const float point_size{*ctx.profile.fixed_state_point_size}; + if (ctx.runtime_info.fixed_state_point_size) { + const float point_size{*ctx.runtime_info.fixed_state_point_size}; ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); } } @@ -62,7 +62,10 @@ Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1 } void AlphaTest(EmitContext& ctx) { - const auto comparison{*ctx.profile.alpha_test_func}; + if (!ctx.runtime_info.alpha_test_func) { + return; + } + const auto comparison{*ctx.runtime_info.alpha_test_func}; if (comparison == CompareFunction::Always) { return; } @@ -76,7 +79,7 @@ void AlphaTest(EmitContext& ctx) { const Id true_label{ctx.OpLabel()}; const Id discard_label{ctx.OpLabel()}; - const Id alpha_reference{ctx.Const(ctx.profile.alpha_test_reference)}; + const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); @@ -113,7 +116,7 @@ void EmitPrologue(EmitContext& ctx) { } void EmitEpilogue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { + if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { ConvertDepthMode(ctx); } if (ctx.stage == Stage::Fragment) { @@ -122,7 +125,7 @@ void EmitEpilogue(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - if (ctx.profile.convert_depth_mode) { + if (ctx.runtime_info.convert_depth_mode) { ConvertDepthMode(ctx); } if (stream.IsImmediate()) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 12699511a..c46452c3d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -81,19 +81,22 @@ struct Profile { bool support_viewport_mask{}; bool support_typeless_image_loads{}; bool support_demote_to_helper_invocation{}; - bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; + + bool warp_size_potentially_larger_than_guest{}; bool lower_left_origin_mode{}; - // FClamp is broken and OpFMax + OpFMin should be used instead + /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; - // Offset image operands with an unsigned type do not work + /// Offset image operands with an unsigned type do not work bool has_broken_unsigned_image_offsets{}; - // Signed instructions with unsigned data types are misinterpreted + /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; - // Ignores SPIR-V ordered vs unordered using GLSL semantics + /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; +}; +struct RuntimeInfo { std::array generic_input_types{}; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b84b36b9d..d7efbdd01 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,33 +61,15 @@ const Shader::Profile profile{ .support_viewport_mask = true, .support_typeless_image_loads = true, .support_demote_to_helper_invocation = false, - .warp_size_potentially_larger_than_guest = true, .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, .lower_left_origin_mode = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, .ignore_nan_fp_comparisons = true, - - .generic_input_types = {}, - .convert_depth_mode = false, - .force_early_z = false, - - .tess_primitive = {}, - .tess_spacing = {}, - .tess_clockwise = false, - - .input_topology = Shader::InputTopology::Triangles, - - .fixed_state_point_size = std::nullopt, - - .alpha_test_func = Shader::CompareFunction::Always, - .alpha_test_reference = 0.0f, - - .y_negate = false, - - .xfb_varyings = {}, }; using Shader::Backend::GLASM::EmitGLASM; @@ -302,10 +284,10 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program, binding)}; + const std::string code{EmitGLASM(profile, {}, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, {}, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7830c0194..88db10b75 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -89,6 +89,208 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } + +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program) { + Shader::RuntimeInfo info; + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != 0}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + } + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.state.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + break; + case Shader::Stage::Fragment: + info.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + info.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + info.force_early_z = key.state.early_z != 0; + info.y_negate = key.state.y_negate != 0; + return info; +} } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -124,7 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - base_profile = Shader::Profile{ + profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, @@ -153,14 +355,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, - .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, - .generic_input_types{}, - .fixed_state_point_size{}, - .alpha_test_func{}, - .xfb_varyings{}, }; } @@ -329,8 +527,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program)}; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -391,7 +589,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(base_profile, program)}; + const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { @@ -403,206 +601,4 @@ std::unique_ptr PipelineCache::CreateComputePipeline( thread_worker, program.info, std::move(spv_module)); } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { - if (attr.enabled == 0) { - return Shader::AttributeType::Disabled; - } - switch (attr.Type()) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return Shader::AttributeType::Float; - case Maxwell::VertexAttribute::Type::SignedInt: - return Shader::AttributeType::SignedInt; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return Shader::AttributeType::UnsignedInt; - } - return Shader::AttributeType::Float; -} - -static std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - -Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { - Shader::Profile profile{base_profile}; - - const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; - const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; - const float point_size{Common::BitCast(key.state.point_size)}; - switch (stage) { - case Shader::Stage::VertexB: - if (!has_geometry) { - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - } - std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), - &CastAttributeType); - break; - case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - profile.tess_clockwise = key.state.tessellation_clockwise == 0; - profile.tess_primitive = [&key] { - const u32 raw{key.state.tessellation_primitive.Value()}; - switch (static_cast(raw)) { - case Maxwell::TessellationPrimitive::Isolines: - return Shader::TessPrimitive::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return Shader::TessPrimitive::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return Shader::TessPrimitive::Quads; - } - UNREACHABLE(); - return Shader::TessPrimitive::Triangles; - }(); - profile.tess_spacing = [&] { - const u32 raw{key.state.tessellation_spacing}; - switch (static_cast(raw)) { - case Maxwell::TessellationSpacing::Equal: - return Shader::TessSpacing::Equal; - case Maxwell::TessellationSpacing::FractionalOdd: - return Shader::TessSpacing::FractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return Shader::TessSpacing::FractionalEven; - } - UNREACHABLE(); - return Shader::TessSpacing::Equal; - }(); - break; - case Shader::Stage::Geometry: - if (program.output_topology == Shader::OutputTopology::PointList) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - break; - case Shader::Stage::Fragment: - profile.alpha_test_func = MaxwellToCompareFunction( - key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); - profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); - break; - default: - break; - } - switch (key.state.topology) { - case Maxwell::PrimitiveTopology::Points: - profile.input_topology = Shader::InputTopology::Points; - break; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - profile.input_topology = Shader::InputTopology::Lines; - break; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - case Maxwell::PrimitiveTopology::Quads: - case Maxwell::PrimitiveTopology::QuadStrip: - case Maxwell::PrimitiveTopology::Polygon: - case Maxwell::PrimitiveTopology::Patches: - profile.input_topology = Shader::InputTopology::Triangles; - break; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - profile.input_topology = Shader::InputTopology::LinesAdjacency; - break; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - profile.input_topology = Shader::InputTopology::TrianglesAdjacency; - break; - } - profile.force_early_z = key.state.early_z != 0; - profile.y_negate = key.state.y_negate != 0; - return profile; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4e48b4956..4116cc73f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -129,9 +129,6 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program); - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -148,7 +145,7 @@ private: ShaderPools main_pools; - Shader::Profile base_profile; + Shader::Profile profile; std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; From c07cc9d6a560d14e25ec59974ae5a15a7842d779 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:57:52 -0300 Subject: [PATCH 452/770] gl_shader_cache: Pass shader runtime information --- .../renderer_opengl/gl_shader_cache.cpp | 76 ++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d7efbdd01..b4f26dd74 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -184,6 +184,76 @@ GLenum AssemblyStage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, + const Shader::IR::Program& program) { + UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); + + Shader::RuntimeInfo info; + switch (program.stage) { + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + switch (key.tessellation_primitive) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + switch (key.tessellation_spacing) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + + break; + default: + break; + } + switch (key.gs_input_topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + return info; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -283,11 +353,13 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; + + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, {}, program, binding)}; + const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, {}, program, binding)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } From b382f57b286f72f89cff4e710ba6e6b34d22035d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:56:59 -0300 Subject: [PATCH 453/770] glasm: Fix output patch reads With this, Luigi's Mansion's sand renders properly. --- .../backend/glasm/emit_context.cpp | 12 +++++++--- .../glasm/emit_glasm_context_get_set.cpp | 23 +++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 659ff6d17..0f7d79843 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -21,6 +21,11 @@ std::string_view InterpDecorator(Interpolation interp) { } throw InvalidArgument("Invalid interpolation {}", interp); } + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} } // Anonymous namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -76,7 +81,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile InterpDecorator(generic.interpolation), index, attr_stage, index, index); } } - if (stage == Stage::Geometry && info.loads_position) { + if (IsInputArray(stage) && info.loads_position) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -96,8 +101,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile continue; } if (stage == Stage::TessellationControl) { - Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};", index, index, - index); + Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};" + "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};", + index, index, index, index, index, index); } else { Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, index, index); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 34669160a..97b0e7409 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -20,15 +20,13 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); } +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { - switch (ctx.stage) { - case Stage::TessellationControl: - case Stage::TessellationEval: - case Stage::Geometry: - return fmt::format("[{}]", vertex); - default: - return ""; - } + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } } // Anonymous namespace @@ -77,7 +75,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - if (ctx.stage == Stage::Geometry) { + if (IsInputArray(ctx.stage)) { ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); } else { ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); @@ -164,7 +162,12 @@ void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { } const u32 index{IR::GenericPatchIndex(patch)}; const u32 element{IR::GenericPatchElement(patch)}; - ctx.Add("MOV.F {},result.patch.attrib[{}].{};", inst, index, "xyzw"[element]); + const char swizzle{"xyzw"[element]}; + if (ctx.stage == Stage::TessellationControl) { + ctx.Add("MOV.F {},primitive.out.patch.attrib[{}].{};", inst, index, swizzle); + } else { + ctx.Add("MOV.F {},primitive.patch.attrib[{}].{};", inst, index, swizzle); + } } void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { From 7dadb2bef3bd7f1038c61cb67c4248e1f7f324df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 03:02:06 -0300 Subject: [PATCH 454/770] glasm: Simplify patch reads --- .../backend/glasm/emit_glasm_context_get_set.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 97b0e7409..14fdc88f6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -163,11 +163,8 @@ void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { const u32 index{IR::GenericPatchIndex(patch)}; const u32 element{IR::GenericPatchElement(patch)}; const char swizzle{"xyzw"[element]}; - if (ctx.stage == Stage::TessellationControl) { - ctx.Add("MOV.F {},primitive.out.patch.attrib[{}].{};", inst, index, swizzle); - } else { - ctx.Add("MOV.F {},primitive.patch.attrib[{}].{};", inst, index, swizzle); - } + const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""}; + ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle); } void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { From 69b910e9e7c2b9c361f4389cb1d136105b991bc0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 17:19:35 -0300 Subject: [PATCH 455/770] video_core: Abstract transform feedback translation utility --- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/fixed_pipeline_state.cpp | 25 +++-- .../renderer_vulkan/fixed_pipeline_state.h | 15 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 86 +--------------- src/video_core/transform_feedback.cpp | 98 +++++++++++++++++++ src/video_core/transform_feedback.h | 30 ++++++ 6 files changed, 145 insertions(+), 111 deletions(-) create mode 100644 src/video_core/transform_feedback.cpp create mode 100644 src/video_core/transform_feedback.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b008c37c0..8250f736c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -187,6 +187,8 @@ add_library(video_core STATIC textures/decoders.h textures/texture.cpp textures/texture.h + transform_feedback.cpp + transform_feedback.h video_core.cpp video_core.h vulkan_common/vulkan_debug_callback.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 24834e0f7..3a43c329f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -15,9 +15,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { - namespace { - constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; @@ -39,6 +37,16 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POLYGON, // Patches }; +void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, @@ -121,7 +129,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, dynamic_state.Refresh(regs); } if (xfb_enabled != 0) { - xfb_state.Refresh(regs); + RefreshXfbState(xfb_state, regs); } } @@ -164,17 +172,6 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } -void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { - std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { - return Layout{ - .stream = layout.stream, - .varying_count = layout.varying_count, - .stride = layout.stride, - }; - }); - varyings = regs.tfb_varying_locs; -} - void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31de6b2c8..0f1eff9cd 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -12,6 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/transform_feedback.h" namespace Vulkan { @@ -130,18 +131,6 @@ struct FixedPipelineState { } }; - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - - void Refresh(const Maxwell& regs); - }; - struct DynamicState { union { u32 raw1; @@ -213,7 +202,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 88db10b75..f86bf9c30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,88 +109,6 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } -std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -206,7 +124,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } @@ -248,7 +166,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; break; diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp new file mode 100644 index 000000000..db52fff93 --- /dev/null +++ b/src/video_core/transform_feedback.cpp @@ -0,0 +1,98 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/transform_feedback.h" + +namespace VideoCommon { + +std::vector MakeTransformFeedbackVaryings( + const TransformFeedbackState& state) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { + const auto& locations = state.varyings[buffer]; + const auto& layout = state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying{ + .buffer = layout.stream, + .stride = layout.stride, + .offset = offset * 4, + .components = 1, + }; + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +} // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h new file mode 100644 index 000000000..6832c6db1 --- /dev/null +++ b/src/video_core/transform_feedback.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/profile.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCommon { + +struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + varyings; +}; + +std::vector MakeTransformFeedbackVaryings( + const TransformFeedbackState& state); + +} // namespace VideoCommon From 6bc54e12a0d274beee0cb7584f73429112ec98b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:17:53 -0300 Subject: [PATCH 456/770] glasm: Set transform feedback state --- .../renderer_opengl/gl_graphics_program.cpp | 90 ++++++++++++++++- .../renderer_opengl/gl_graphics_program.h | 32 +++--- .../renderer_opengl/gl_rasterizer.cpp | 98 ++----------------- .../renderer_opengl/gl_rasterizer.h | 6 +- .../renderer_opengl/gl_shader_cache.cpp | 19 +++- 5 files changed, 132 insertions(+), 113 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index b5d75aa13..9677a3ed6 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -12,7 +12,7 @@ #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { - +namespace { using Shader::ImageBufferDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; @@ -20,6 +20,35 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + size_t GraphicsProgramKey::Hash() const noexcept { return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); } @@ -34,7 +63,8 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos) + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( @@ -74,6 +104,10 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } } struct Spec { @@ -302,4 +336,56 @@ void GraphicsProgram::Configure(bool is_indexed) { } } +void GraphicsProgram::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsProgram::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 18292bb16..53a57ede5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -16,6 +16,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" namespace OpenGL { @@ -24,16 +25,6 @@ class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - std::array unique_hashes; union { u32 raw; @@ -45,7 +36,7 @@ struct GraphicsProgramKey { BitField<10, 1, u32> tessellation_clockwise; }; std::array padding; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; size_t Hash() const noexcept; @@ -75,11 +66,22 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos); + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); void Configure(bool is_indexed); + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -96,6 +98,12 @@ private: std::array base_storage_bindings{}; std::array num_texture_buffers{}; std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4834d58f0..51ff42ee9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -51,37 +51,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); namespace { - constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} - void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } @@ -253,7 +224,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(primitive_mode); + BeginTransformFeedback(program, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -1025,68 +996,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::SyncTransformFeedback() { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs = maxwell3d.regs; - - static constexpr std::size_t STRIDE = 3; - std::array attribs; - std::array streams; - - GLint* cursor = attribs.data(); - GLint* current_stream = streams.data(); - - for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - - *current_stream = static_cast(feedback); - if (current_stream != streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += STRIDE; - } - } - - const GLsizei num_attribs = static_cast((cursor - attribs.data()) / STRIDE); - const GLsizei num_strides = static_cast(current_stream - streams.data()); - glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), - GL_INTERLEAVED_ATTRIBS); -} - -void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } - if (device.UseAssemblyShaders()) { - SyncTransformFeedback(); - } + program->ConfigureTransformFeedback(); + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1100,11 +1016,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = maxwell3d.regs; - if (regs.tfb_enabled == 0) { - return; + if (maxwell3d.regs.tfb_enabled != 0) { + glEndTransformFeedback(); } - glEndTransformFeedback(); } AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2fdcbe4ba..08f509c19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -193,12 +193,8 @@ private: /// Syncs vertex instances to match the guest state void SyncVertexInstances(); - /// Syncs transform feedback state to match guest state - /// @note Only valid on assembly shaders - void SyncTransformFeedback(); - /// Begin a transform feedback - void BeginTransformFeedback(GLenum primitive_mode); + void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4f26dd74..0a0f1324f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,6 +254,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, } return info; } + +void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -282,7 +293,10 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); - + graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0); + if (graphics_key.xfb_enabled) { + SetXfbState(graphics_key.xfb_state, regs); + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { @@ -368,7 +382,8 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos); + std::move(source_program), std::move(assembly_programs), infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } std::unique_ptr ShaderCache::CreateComputeProgram( From 84feabac881443d27f84f8fec5eba6dc3b13b620 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:27:37 -0300 Subject: [PATCH 457/770] glasm: Implement forced early Z --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 8 ++++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index edff04a44..0c2bbf284 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -261,7 +261,8 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } -void SetupOptions(const IR::Program& program, const Profile& profile, std::string& header) { +void SetupOptions(const IR::Program& program, const Profile& profile, + const RuntimeInfo& runtime_info, std::string& header) { const Info& info{program.info}; const Stage stage{program.stage}; @@ -296,6 +297,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin header += "OPTION NV_viewport_array2;"; } } + if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { + header += "OPTION NV_early_fragment_tests;"; + } const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { header += "OPTION ARB_draw_buffers;"; @@ -380,7 +384,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; - SetupOptions(program, profile, header); + SetupOptions(program, profile, runtime_info, header); switch (program.stage) { case Stage::TessellationControl: header += fmt::format("VERTICES_OUT {};", program.invocations); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a0f1324f..e678b4bb2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,8 +219,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, return Shader::TessSpacing::Equal; }(); break; - case Shader::Stage::Geometry: - + case Shader::Stage::Fragment: + info.force_early_z = key.early_z != 0; break; default: break; From df406246d9117ba1c428d81ba7466ba0291ece3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:36:30 -0300 Subject: [PATCH 458/770] gl_shader_cache: Improve GLASM error print logic --- .../renderer_opengl/gl_shader_cache.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e678b4bb2..747a133fb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -140,13 +140,16 @@ OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { glGenProgramsARB(1, &program.handle); glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, static_cast(code.size()), code.data()); - if (!Settings::values.renderer_debug) { - return program; - } - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "{}", code); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } + } } return program; } From c31521512fd49603ea42c93e2a6eac5d7985cd78 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:46:40 -0300 Subject: [PATCH 459/770] gl_shader_cache,glasm: Conditionally use typeless image reads extension --- .../backend/glasm/emit_glasm.cpp | 6 +- .../renderer_opengl/gl_shader_cache.cpp | 74 +++++++++---------- .../renderer_opengl/gl_shader_cache.h | 2 + 3 files changed, 43 insertions(+), 39 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0c2bbf284..8718cc7ec 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -271,8 +271,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;" "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;" - "OPTION EXT_shader_image_load_formatted;"; + "OPTION ARB_derivative_control;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -297,6 +296,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_viewport_array2;"; } } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + header += "OPTION EXT_shader_image_load_formatted;"; + } if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 747a133fb..2c0510f11 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -36,42 +36,6 @@ namespace OpenGL { namespace { -// FIXME: Move this somewhere else -const Shader::Profile profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = true, - .support_viewport_mask = true, - .support_typeless_image_loads = true, - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - - .warp_size_potentially_larger_than_guest = true, - .lower_left_origin_mode = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, -}; - using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; @@ -279,7 +243,43 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} {} + state_tracker_} { + profile = Shader::Profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = true, + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + }; +} ShaderCache::~ShaderCache() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b479d073a..b49cd0ac7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,6 +86,8 @@ private: ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; + + Shader::Profile profile; }; } // namespace OpenGL From 1bccb43cbecdbf069f5c86086670a8d5440408e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:47:48 -0300 Subject: [PATCH 460/770] gl_shader_cache: Conditionally use viewport mask --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2c0510f11..cf03280fa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,7 +266,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_vote = true, .support_viewport_index_layer_non_geometry = device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = true, + .support_viewport_mask = device.HasNvViewportArray2(), .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, From ddf601919f0f415f19082be49f89c80eaba1352b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:55:04 -0300 Subject: [PATCH 461/770] glasm: Fix EmitVertex's optimization --- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index b60c907df..962719ffb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -73,7 +73,7 @@ void EmitEpilogue(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { - if (stream.type == Type::U32 && stream.imm_u32) { + if (stream.type == Type::U32 && stream.imm_u32 == 0) { ctx.Add("EMIT;"); } else { ctx.Add("EMITS {};", stream); From 77d8c44b68c02bcea0ca07944fc6a87c336d72a5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:59:24 -0300 Subject: [PATCH 462/770] glasm: Implement IsHelperInvocation --- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 15380a955..4d2d7a928 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -70,7 +70,7 @@ void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx); -void EmitIsHelperInvocation(EmitContext& ctx); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 962719ffb..1e236c487 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -168,8 +168,8 @@ void EmitSampleId(EmitContext& ctx) { NotImplemented(); } -void EmitIsHelperInvocation(EmitContext& ctx) { - NotImplemented(); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); } void EmitYDirection(EmitContext& ctx) { From 89e341d56ab49ca38a83ba798c78453243006c12 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 21:01:05 -0300 Subject: [PATCH 463/770] glasm: Implement SampleId --- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 4d2d7a928..2cc3acedf 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -69,7 +69,7 @@ void EmitSetOFlag(EmitContext& ctx); void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); -void EmitSampleId(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 1e236c487..756134d02 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -164,8 +164,8 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); } -void EmitSampleId(EmitContext& ctx) { - NotImplemented(); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst); } void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { From e565eb361a3518f61cc7c23cf4fbba3e0a0ea0f6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 21:14:11 -0300 Subject: [PATCH 464/770] glasm: Implement gl_Layer stores --- .../backend/glasm/emit_glasm_context_get_set.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 14fdc88f6..7a7297801 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -134,6 +134,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ctx.Add("MOV.F result.clip[{}].x,{};", index, value); break; } + case IR::Attribute::Layer: + if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.layer.x,{};", value); + } else { + // LOG_WARNING + } + break; case IR::Attribute::ViewportIndex: if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.viewport.x,{};", value); From 3a7ca6a7db5d9c895fceef21f9239dfcef2fcc8e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 21:14:59 -0300 Subject: [PATCH 465/770] glasm: Reorder unreachable image atomic insts Reorder them to the bottom of the file for readability. --- .../backend/glasm/emit_glasm_image_atomic.cpp | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp index a0b9866de..f82cf9ffc 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp @@ -8,6 +8,72 @@ namespace Shader::Backend::GLASM { +void EmitImageAtomicIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicInc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicDec32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, + [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitImageAtomicExchange32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { + throw NotImplementedException("GLASM instruction"); +} + void EmitBindlessImageAtomicIAdd32(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -96,70 +162,4 @@ void EmitBoundImageAtomicExchange32(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitImageAtomicIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicInc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicDec32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicExchange32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - } // namespace Shader::Backend::GLASM From c8414e686f30b3bec7f179ee7ab800f223f8ece0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 21:42:48 -0300 Subject: [PATCH 466/770] glasm: Implement image atomics --- src/shader_recompiler/CMakeLists.txt | 1 - .../backend/glasm/emit_glasm_image.cpp | 153 ++++++++++++++++ .../backend/glasm/emit_glasm_image_atomic.cpp | 165 ------------------ 3 files changed, 153 insertions(+), 166 deletions(-) delete mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 0d55924a7..becdb7d54 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -13,7 +13,6 @@ add_library(shader_recompiler STATIC backend/glasm/emit_glasm_convert.cpp backend/glasm/emit_glasm_floating_point.cpp backend/glasm/emit_glasm_image.cpp - backend/glasm/emit_glasm_image_atomic.cpp backend/glasm/emit_glasm_instructions.h backend/glasm/emit_glasm_integer.cpp backend/glasm/emit_glasm_logical.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 385ca51ac..a7def0897 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -205,6 +205,16 @@ std::string_view FormatStorage(ImageFormat format) { } throw InvalidArgument("Invalid image format {}", format); } + +template +void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, + std::string_view op) { + const auto info{inst.Flags()}; + const std::string_view type{TextureType(info)}; + const std::string image{Image(ctx, info, index)}; + const Register ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, @@ -590,6 +600,61 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Re ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); } +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarS32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, + ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + Register coord, ScalarU32 value) { + ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); +} + void EmitBindlessImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -686,4 +751,92 @@ void EmitBoundImageWrite(EmitContext&) { throw LogicError("Unreachable instruction"); } +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp deleted file mode 100644 index f82cf9ffc..000000000 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image_atomic.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/backend/glasm/emit_context.h" -#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::Backend::GLASM { - -void EmitImageAtomicIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarS32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicInc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicDec32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, [[maybe_unused]] Register coords, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitImageAtomicExchange32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] Register coords, [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitBindlessImageAtomicIAdd32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicSMin32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicUMin32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicSMax32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicUMax32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicInc32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicDec32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicAnd32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicOr32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicXor32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBindlessImageAtomicExchange32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicIAdd32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicSMin32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicUMin32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicSMax32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicUMax32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicInc32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicDec32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicAnd32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicOr32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicXor32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitBoundImageAtomicExchange32(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -} // namespace Shader::Backend::GLASM From e30d4fa976f608a79629e12cac25f7b951e6b8fb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 22 May 2021 18:29:43 -0300 Subject: [PATCH 467/770] glasm: Implement indirect attribute loads --- .../backend/glasm/emit_context.cpp | 3 +- .../backend/glasm/emit_context.h | 1 + .../glasm/emit_glasm_context_get_set.cpp | 65 ++++++++++++++++++- .../backend/glasm/emit_glasm_instructions.h | 2 +- 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 0f7d79843..b5b0e2204 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -30,9 +30,8 @@ bool IsInputArray(Stage stage) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) - : profile{profile_}, runtime_info{runtime_info_} { + : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { // FIXME: Temporary partial implementation - const auto& info{program.info}; u32 cbuf_index{}; for (const auto& desc : info.constant_buffer_descriptors) { if (desc.count != 1) { diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 1f057fdd5..057d74790 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -58,6 +58,7 @@ public: std::string code; RegAlloc reg_alloc{*this}; + const Info& info; const Profile& profile; const RuntimeInfo& runtime_info; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 7a7297801..b44c00c73 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/shader_info.h" namespace Shader::Backend::GLASM { namespace { @@ -153,9 +154,67 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, } } -void EmitGetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, - [[maybe_unused]] ScalarU32 vertex) { - throw NotImplementedException("GLASM instruction"); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) { + // RC.x = base_index + // RC.y = masked_index + // RC.z = compare_index + ctx.Add("SHR.S RC.x,{},2;" + "AND.S RC.y,RC.x,3;" + "SHR.S RC.z,{},4;", + offset, offset); + + const Register ret{ctx.reg_alloc.Define(inst)}; + u32 num_endifs{}; + const auto read{[&](u32 compare_index, const std::array& values) { + ++num_endifs; + ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index + "IF NE.w;" + // X + "SEQ.S.CC RC.w,RC.y,0;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // Y + "SEQ.S.CC RC.w,RC.y,1;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // Z + "SEQ.S.CC RC.w,RC.y,2;" + "IF NE.w;" + "MOV {}.x,{};" + "ELSE;" + // W + "MOV {}.x,{};" + "ENDIF;" + "ENDIF;" + "ENDIF;" + "ELSE;", + compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]); + }}; + const auto read_swizzled{[&](u32 compare_index, std::string_view value) { + const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value), + fmt::format("{}.z", value), fmt::format("{}.w", value)}; + read(compare_index, values); + }}; + if (ctx.info.loads_position) { + const u32 index{static_cast(IR::Attribute::PositionX)}; + if (IsInputArray(ctx.stage)) { + read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); + } else { + read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); + } + } + const u32 base_attribute_value{static_cast(IR::Attribute::Generic0X) >> 2}; + for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { + if (!ctx.info.input_generics[index].used) { + continue; + } + read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); + } + for (u32 i = 0; i < num_endifs; ++i) { + ctx.Add("ENDIF;"); + } } void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 2cc3acedf..e205c3d14 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -51,7 +51,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); -void EmitGetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarU32 vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); From 7fdf0d7d33ed1d0e860d6d1fca97bb66fee0d6a1 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 23 May 2021 19:36:59 -0400 Subject: [PATCH 468/770] emit_glasm_context_get_set: Remove unused variable --- .../backend/glasm/emit_glasm_context_get_set.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index b44c00c73..787612def 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -205,7 +205,6 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - const u32 base_attribute_value{static_cast(IR::Attribute::Generic0X) >> 2}; for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { if (!ctx.info.input_generics[index].used) { continue; From 0794273870dfc0ae9c217b6869845debfc8a8792 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 20:16:09 -0300 Subject: [PATCH 469/770] glasm: Implement int64 add and subtract --- .../backend/glasm/emit_glasm_instructions.h | 4 ++-- .../backend/glasm/emit_glasm_integer.cpp | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index e205c3d14..4dee9daf9 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -299,9 +299,9 @@ void EmitFPIsNan16(EmitContext& ctx, Register value); void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); -void EmitIAdd64(EmitContext& ctx, Register a, Register b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); -void EmitISub64(EmitContext& ctx, Register a, Register b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index f75fcba47..e5aac14c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -37,18 +37,16 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { } } -void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, - [[maybe_unused]] Register b) { - throw NotImplementedException("GLASM instruction"); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { + ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b); } void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { ctx.Add("SUB.S {}.x,{},{};", inst, a, b); } -void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register a, - [[maybe_unused]] Register b) { - throw NotImplementedException("GLASM instruction"); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { + ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b); } void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { From c721767bccbcbd481c0e545b707754b270a7ea02 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 20:17:05 -0300 Subject: [PATCH 470/770] glasm: Implement global memory fallbacks --- .../backend/glasm/emit_glasm_instructions.h | 22 ++-- .../backend/glasm/emit_glasm_memory.cpp | 117 ++++++++++++------ 2 files changed, 89 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 4dee9daf9..5e038b332 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -79,17 +79,17 @@ void EmitUndefU8(EmitContext& ctx); void EmitUndefU16(EmitContext& ctx); void EmitUndefU32(EmitContext& ctx); void EmitUndefU64(EmitContext& ctx); -void EmitLoadGlobalU8(EmitContext& ctx); -void EmitLoadGlobalS8(EmitContext& ctx); -void EmitLoadGlobalU16(EmitContext& ctx); -void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx, Register address); -void EmitLoadGlobal64(EmitContext& ctx, Register address); -void EmitLoadGlobal128(EmitContext& ctx, Register address); -void EmitWriteGlobalU8(EmitContext& ctx); -void EmitWriteGlobalS8(EmitContext& ctx); -void EmitWriteGlobalU16(EmitContext& ctx); -void EmitWriteGlobalS16(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address); +void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value); +void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value); void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index dd307a9a3..33af83212 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -29,8 +29,39 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, } } +void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr, + std::string_view else_expr = {}) { + const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!ctx.info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; + ctx.Add("LDC.U64 DC.x,c[{}];" // ssbo_addr + "LDC.U32 RC.x,c[{}];" // ssbo_size_u32 + "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 + "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size + "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 + "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 + "AND.U.CC RC.x,RC.x,RC.y;" + "IF NE.x;" // a && b + "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr + "PK64.U DC.y,c[{}];" // host_ssbo = cbuf + "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset + "{}", + "ELSE;", index, index, ssbo.cbuf_offset, ssbo.cbuf_offset + 8, address, address, + address, index, then_expr); + } + if (!else_expr.empty()) { + ctx.Add("{}", else_expr); + } + for (size_t index = 0; index < num_buffers; ++index) { + ctx.Add("ENDIF;"); + } +} + template -void Store(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, +void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, std::string_view size) { StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); } @@ -41,65 +72,73 @@ void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), fmt::format("MOV.U {},{{0,0,0,0}};", ret)); } + +template +void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { + GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value)); +} + +void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.S {},0;", ret)); +} } // Anonymous namespace -void EmitLoadGlobalU8([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U8"); } -void EmitLoadGlobalS8([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "S8"); } -void EmitLoadGlobalU16([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U16"); } -void EmitLoadGlobalS16([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "S16"); } -void EmitLoadGlobal32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32"); } -void EmitLoadGlobal64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32X2"); } -void EmitLoadGlobal128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address) { - throw NotImplementedException("GLASM instruction"); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) { + GlobalLoad(ctx, inst, address, "U32X4"); } -void EmitWriteGlobalU8([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U8"); } -void EmitWriteGlobalS8([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "S8"); } -void EmitWriteGlobalU16([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U16"); } -void EmitWriteGlobalS16([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "S16"); } -void EmitWriteGlobal32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address, - [[maybe_unused]] ScalarU32 value) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) { + GlobalWrite(ctx, address, value, "U32"); } -void EmitWriteGlobal64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address, - [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U32X2"); } -void EmitWriteGlobal128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register address, - [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); +void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) { + GlobalWrite(ctx, address, value, "U32X4"); } void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -139,37 +178,37 @@ void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ScalarU32 value) { - Store(ctx, binding, offset, value, "U8"); + Write(ctx, binding, offset, value, "U8"); } void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ScalarS32 value) { - Store(ctx, binding, offset, value, "S8"); + Write(ctx, binding, offset, value, "S8"); } void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ScalarU32 value) { - Store(ctx, binding, offset, value, "U16"); + Write(ctx, binding, offset, value, "U16"); } void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ScalarS32 value) { - Store(ctx, binding, offset, value, "S16"); + Write(ctx, binding, offset, value, "S16"); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ScalarU32 value) { - Store(ctx, binding, offset, value, "U32"); + Write(ctx, binding, offset, value, "U32"); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, Register value) { - Store(ctx, binding, offset, value, "U32X2"); + Write(ctx, binding, offset, value, "U32X2"); } void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, Register value) { - Store(ctx, binding, offset, value, "U32X4"); + Write(ctx, binding, offset, value, "U32X4"); } } // namespace Shader::Backend::GLASM From 80884e32701e1e93fded045be4c235ff143d6ea0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 21:24:24 -0300 Subject: [PATCH 471/770] gl_graphics_program: Fix texture buffer bindings --- .../renderer_opengl/gl_graphics_program.cpp | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 9677a3ed6..7c3d23f85 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "common/cityhash.h" @@ -14,12 +15,24 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -77,30 +90,25 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } u32 num_textures{}; u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - for (const auto& desc : info.constant_buffer_descriptors) { - base_uniform_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.storage_buffers_descriptors) { - base_storage_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers[stage] += desc.count; - num_textures += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers[stage] += desc.count; - num_images += desc.count; - } - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); @@ -151,8 +159,8 @@ void GraphicsProgram::Configure(bool is_indexed) { const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v || + std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; @@ -297,6 +305,9 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_binding += num_texture_buffers[stage]; image_binding += num_image_buffers[stage]; + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { From 40179282137370380387cab2610dcf21bd709efa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 May 2021 03:24:19 -0300 Subject: [PATCH 472/770] gl_shader_cache: Do not flip tessellation on OpenGL --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cf03280fa..ceec83a8a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -159,8 +159,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { switch (key.tessellation_primitive) { case Maxwell::TessellationPrimitive::Isolines: From 9fbfe7d676790dea160368eda6492e8feb6e2f4a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 May 2021 20:59:49 -0300 Subject: [PATCH 473/770] glasm: Fix usage counting on phi nodes --- .../backend/glasm/emit_glasm.cpp | 12 +++++++++--- .../backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 16 ++++++++++++---- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 8718cc7ec..2ce839059 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -178,6 +178,10 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + void Precolor(EmitContext& ctx, const IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { @@ -194,11 +198,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) { default: throw NotImplementedException("Phi node type {}", phi.Type()); } + // Insert phi moves before references to avoid overwritting them const size_t num_args{phi.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { - IR::IREmitter{*phi.PhiBlock(i)}.PhiMove(phi, phi.Arg(i)); + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i)); } - // Add reference to the phi node on the phi predecessor to avoid overwritting it for (size_t i = 0; i < num_args; ++i) { IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); } @@ -237,7 +243,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } else { ctx.Add("MOV.S.CC RC,{};" - "BRK (EQ.x);" + "BRK(EQ.x);" "ENDREP;", eval(node.data.repeat.cond)); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 5e038b332..cc7aa8e20 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -23,7 +23,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitReference(EmitContext&); +void EmitReference(EmitContext&, const IR::Value& value); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); void EmitJoin(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 756134d02..3d7a3ebb4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -17,18 +17,26 @@ namespace Shader::Backend::GLASM { #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) -void EmitPhi(EmitContext&, IR::Inst&) {} +void EmitPhi(EmitContext& ctx, IR::Inst& inst) { + const size_t num_args{inst.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.reg_alloc.Consume(inst.Arg(i)); + } +} void EmitVoid(EmitContext&) {} -void EmitReference(EmitContext&) {} +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.reg_alloc.Consume(value); +} void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { + const Register phi_reg{ctx.reg_alloc.Consume(phi)}; + const Value eval_value{ctx.reg_alloc.Consume(value)}; + if (phi == value) { return; } - const Register phi_reg{ctx.reg_alloc.Consume(phi)}; - const Value eval_value{ctx.reg_alloc.Consume(value)}; switch (phi.Inst()->Flags()) { case IR::Type::U1: case IR::Type::U32: From ca05a13c62ad7693f8be924c168e400e8139b0d2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 02:22:21 -0300 Subject: [PATCH 474/770] glasm: Catch more register leaks Add support for null registers. These are used when an instruction has no usages. This comes handy when an instruction is only used for its CC value, with the caveat of having to invalidate all pseudo-instructions before defining the instruction itself in the register allocator. This commits changes this. Workaround a bug on Nvidia's condition codes conditional execution using branches. --- .../backend/glasm/emit_glasm.cpp | 8 ++- .../glasm/emit_glasm_bitwise_conversion.cpp | 8 ++- .../backend/glasm/emit_glasm_composite.cpp | 4 +- .../backend/glasm/emit_glasm_image.cpp | 27 +++++---- .../backend/glasm/emit_glasm_integer.cpp | 58 +++++++++++++------ .../backend/glasm/emit_glasm_warp.cpp | 6 +- .../backend/glasm/reg_alloc.cpp | 28 +++++++-- .../backend/glasm/reg_alloc.h | 16 +++-- 8 files changed, 114 insertions(+), 41 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2ce839059..4aa3682c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) { for (size_t i = 0; i < num_args; ++i) { IR::Block& phi_block{*phi.PhiBlock(i)}; auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; - IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i)); + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + } } for (size_t i = 0; i < num_args; ++i) { IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 808c72105..9201ccd39 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { } void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { - ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)}); + // Fake one usage to get a real register out of the condition + inst.DestructiveAddUsage(1); + const Register ret{ctx.reg_alloc.Define(inst)}; + const ScalarS32 input{ctx.reg_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("MOV.S {},{};", ret, input); + } } void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index d829f05b3..bff0b7c1c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec // The input composite is not aliased with the return value so we have to copy it before // hand. But the insert object is not aliased with the return value, so we don't have to // worry about that - ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object); + ctx.Add("MOV.{} {},{};" + "MOV.{} {}.{},{};", + type, ret, composite, type, ret, swizzle, object); } else { // The return value is alised so we can just insert the object, it doesn't matter if it's // aliased diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index a7def0897..34725b8c6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { ctx.Add("MOV.S {},-1;" "MOV.S {}(NONRESIDENT),0;", sparse_ret, sparse_ret); - sparse_inst->Invalidate(); } std::string_view FormatStorage(ImageFormat format) { @@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis const Register ret{ctx.reg_alloc.Define(inst)}; ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); } + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, Register bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; const std::string_view type{TextureType(info)}; @@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: } const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: } const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; const auto info{inst.Flags()}; const char comp{"xyzw"[info.gather_component]}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde // Allocate offsets early so they don't overwrite any consumed register const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, dpdx = ScopedRegister{ctx.reg_alloc}; dpdy = ScopedRegister{ctx.reg_alloc}; } - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view format{FormatStorage(info.image_format)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e5aac14c8..e9d1bae05 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -9,6 +9,17 @@ namespace Shader::Backend::GLASM { void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + const std::array flags{ + inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), + }; + for (IR::Inst* const flag_inst : flags) { + if (flag_inst) { + flag_inst->Invalidate(); + } + } const bool cc{inst.HasAssociatedPseudoOperation()}; const std::string_view cc_mod{cc ? ".CC" : ""}; if (cc) { @@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { if (!cc) { return; } - static constexpr std::array masks{"EQ", "SF", "CF", "OF"}; - const std::array flags{ - inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), - inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), - inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), - inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), - }; - for (size_t i = 0; i < flags.size(); ++i) { - if (flags[i]) { - const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; - ctx.Add("MOV.S {},0;" - "MOV.S {}({}.x),-1;", - flag_ret, flag_ret, masks[i]); - flags[i]->Invalidate(); + static constexpr std::array masks{"", "SF", "CF", "OF"}; + for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { + if (!flags[flag_index]) { + continue; + } + const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; + if (flag_index == 0) { + ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); + } else { + // We could use conditional execution here, but it's broken on Nvidia's compiler + ctx.Add("IF {}.x;" + "MOV.S {}.x,-1;" + "ELSE;" + "MOV.S {}.x,0;" + "ENDIF;", + masks[flag_index], flag_ret, flag_ret); } } } @@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, ScalarU32 count) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } const Register ret{ctx.reg_alloc.Define(inst)}; if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); @@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal "BFE.U {},RC,{};", count, offset, ret, base); } - if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { + if (zero) { ctx.Add("SEQ.S {},{},0;", *zero, ret); - zero->Invalidate(); } - if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { + if (sign) { ctx.Add("SLT.S {},{},0;", *sign, ret); - sign->Invalidate(); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index af0e13d43..6e30790bb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, const IR::Value& clamp, const IR::Value& segmentation_mask, std::string_view op) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (in_bounds) { + in_bounds->Invalidate(); + } std::string mask; if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); @@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 ScalarU32{ctx.reg_alloc.Consume(clamp)}); } const Register value_ret{ctx.reg_alloc.Define(inst)}; - IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; if (in_bounds) { const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; ctx.Add("SHF{}.U {},{},{},{};" "MOV.U {}.x,{}.y;", op, bounds_ret, value, index, mask, value_ret, bounds_ret); - in_bounds->Invalidate(); } else { ctx.Add("SHF{}.U {},{},{},{};" "MOV.U {}.x,{}.y;", diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 707b22247..c55a833c6 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) { } Value RegAlloc::Peek(const IR::Value& value) { - return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive()); + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return PeekInst(*value.Inst()); + } } Value RegAlloc::Consume(const IR::Value& value) { - return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return ConsumeInst(*value.Inst()); + } } void RegAlloc::Unref(IR::Inst& inst) { @@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) { } Register RegAlloc::Define(IR::Inst& inst, bool is_long) { - inst.SetDefinition(Alloc(is_long)); + if (inst.HasUses()) { + inst.SetDefinition(Alloc(is_long)); + } else { + Id id{}; + id.is_long.Assign(is_long ? 1 : 0); + id.is_null.Assign(1); + inst.SetDefinition(id); + } return Register{PeekInst(inst)}; } @@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) { num_regs = std::max(num_regs, reg + 1); use[reg] = true; Id ret{}; - ret.index.Assign(static_cast(reg)); + ret.is_valid.Assign(1); ret.is_long.Assign(is_long ? 1 : 0); ret.is_spill.Assign(0); ret.is_condition_code.Assign(0); + ret.is_null.Assign(0); + ret.index.Assign(static_cast(reg)); return ret; } } @@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) { } void RegAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid register"); + } if (id.is_spill != 0) { throw NotImplementedException("Free spill"); } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 41b7c92be..b97c84146 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -35,10 +35,12 @@ enum class Type : u32 { struct Id { union { u32 raw; - BitField<0, 29, u32> index; - BitField<29, 1, u32> is_long; - BitField<30, 1, u32> is_spill; - BitField<31, 1, u32> is_condition_code; + BitField<0, 1, u32> is_valid; + BitField<1, 1, u32> is_long; + BitField<2, 1, u32> is_spill; + BitField<3, 1, u32> is_condition_code; + BitField<4, 1, u32> is_null; + BitField<5, 27, u32> index; }; bool operator==(Id rhs) const noexcept { @@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) { throw NotImplementedException("Spill emission"); } if constexpr (scalar) { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); + } if (id.is_long != 0) { return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); } else { return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); } } else { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); + } if (id.is_long != 0) { return fmt::format_to(ctx.out(), "D{}", id.index.Value()); } else { From 379b305b4bc09799d53981fa6e5d9cbe6be99561 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 02:34:08 -0300 Subject: [PATCH 475/770] glasm: Throw when there are register leaks --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 +++ src/shader_recompiler/backend/glasm/reg_alloc.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4aa3682c2..0e9dc06a6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -271,6 +271,9 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { break; } } + if (!ctx.reg_alloc.IsEmpty()) { + throw LogicError("Register allocator is not empty"); + } } void SetupOptions(const IR::Program& program, const Profile& profile, diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index b97c84146..019e1bc0f 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -128,6 +128,10 @@ public: return num_used_long_registers; } + [[nodiscard]] bool IsEmpty() const noexcept { + return register_use.none() && long_register_use.none(); + } + /// Returns true if the instruction is expected to be aliased to another static bool IsAliased(const IR::Inst& inst); From 75fd0079db9ac2f3bc6bcf182ed080a58538ed06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 02:46:51 -0300 Subject: [PATCH 476/770] glasm: Remove unnecessary value types --- .../backend/glasm/emit_glasm.cpp | 12 +------ .../backend/glasm/reg_alloc.cpp | 8 ++--- .../backend/glasm/reg_alloc.h | 33 +------------------ 3 files changed, 6 insertions(+), 47 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0e9dc06a6..5ffefaad2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -49,8 +49,7 @@ public: inst = ir_value.InstRecursive(); reg = Register{value}; } else { - const bool is_long{value.type == Type::F64 || value.type == Type::U64}; - reg = is_long ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); + reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); } switch (value.type) { case Type::Register: @@ -59,18 +58,9 @@ public: case Type::U32: ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); break; - case Type::S32: - ctx.Add("MOV.S {}.x,{};", reg, value.imm_s32); - break; - case Type::F32: - ctx.Add("MOV.F {}.x,{};", reg, value.imm_f32); - break; case Type::U64: ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); break; - case Type::F64: - ctx.Add("MOV.F64 {}.x,{};", reg, value.imm_f64); - break; } } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index c55a833c6..4c046db6e 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -78,16 +78,16 @@ Value RegAlloc::MakeImm(const IR::Value& value) { ret.imm_u32 = value.U32(); break; case IR::Type::F32: - ret.type = Type::F32; - ret.imm_f32 = value.F32(); + ret.type = Type::U32; + ret.imm_u32 = Common::BitCast(value.F32()); break; case IR::Type::U64: ret.type = Type::U64; ret.imm_u64 = value.U64(); break; case IR::Type::F64: - ret.type = Type::F64; - ret.imm_f64 = value.F64(); + ret.type = Type::U64; + ret.imm_u64 = Common::BitCast(value.F64()); break; default: throw NotImplementedException("Immediate type {}", value.Type()); diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 019e1bc0f..5a703daf2 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -26,10 +26,7 @@ enum class Type : u32 { Void, Register, U32, - S32, - F32, U64, - F64, }; struct Id { @@ -57,10 +54,7 @@ struct Value { union { Id id; u32 imm_u32; - s32 imm_s32; - f32 imm_f32; u64 imm_u64; - f64 imm_f64; }; bool operator==(const Value& rhs) const noexcept { @@ -74,14 +68,8 @@ struct Value { return id == rhs.id; case Type::U32: return imm_u32 == rhs.imm_u32; - case Type::S32: - return imm_s32 == rhs.imm_s32; - case Type::F32: - return Common::BitCast(imm_f32) == Common::BitCast(rhs.imm_f32); case Type::U64: return imm_u64 == rhs.imm_u64; - case Type::F64: - return Common::BitCast(imm_f64) == Common::BitCast(rhs.imm_f64); } return false; } @@ -245,12 +233,7 @@ struct fmt::formatter { return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: return fmt::format_to(ctx.out(), "{}", value.imm_u32); - case Shader::Backend::GLASM::Type::S32: - return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_s32)); - case Shader::Backend::GLASM::Type::F32: - return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); case Shader::Backend::GLASM::Type::U64: - case Shader::Backend::GLASM::Type::F64: break; } throw Shader::InvalidArgument("Invalid value type {}", value.type); @@ -271,12 +254,7 @@ struct fmt::formatter { return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: return fmt::format_to(ctx.out(), "{}", static_cast(value.imm_u32)); - case Shader::Backend::GLASM::Type::S32: - return fmt::format_to(ctx.out(), "{}", value.imm_s32); - case Shader::Backend::GLASM::Type::F32: - return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_f32)); case Shader::Backend::GLASM::Type::U64: - case Shader::Backend::GLASM::Type::F64: break; } throw Shader::InvalidArgument("Invalid value type {}", value.type); @@ -296,13 +274,8 @@ struct fmt::formatter { case Shader::Backend::GLASM::Type::Register: return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: - return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_u32)); - case Shader::Backend::GLASM::Type::S32: - return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_s32)); - case Shader::Backend::GLASM::Type::F32: - return fmt::format_to(ctx.out(), "{}", value.imm_f32); + return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_u32)); case Shader::Backend::GLASM::Type::U64: - case Shader::Backend::GLASM::Type::F64: break; } throw Shader::InvalidArgument("Invalid value type {}", value.type); @@ -322,13 +295,9 @@ struct fmt::formatter { case Shader::Backend::GLASM::Type::Register: return Shader::Backend::GLASM::FormatTo(ctx, value.id); case Shader::Backend::GLASM::Type::U32: - case Shader::Backend::GLASM::Type::S32: - case Shader::Backend::GLASM::Type::F32: break; case Shader::Backend::GLASM::Type::U64: return fmt::format_to(ctx.out(), "{}", Common::BitCast(value.imm_u64)); - case Shader::Backend::GLASM::Type::F64: - return fmt::format_to(ctx.out(), "{}", value.imm_f64); } throw Shader::InvalidArgument("Invalid value type {}", value.type); } From 70c9281fbffbf5f68a267ba85f6e4b3c122fa3ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 02:48:03 -0300 Subject: [PATCH 477/770] glasm: Fix INeg32 on negative immediates --- src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e9d1bae05..cea45a3e0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -67,7 +67,11 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { } void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { - ctx.Add("MOV.S {},-{};", inst, value); + if (value.type != Type::Register && static_cast(value.imm_u32) < 0) { + ctx.Add("MOV.S {},{};", inst, -static_cast(value.imm_u32)); + } else { + ctx.Add("MOV.S {},-{};", inst, value); + } } void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { From 77ee733c3ac8b78194c4214330237f641712c1d6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 03:20:37 -0300 Subject: [PATCH 478/770] glasm: Remove unintentionally committed fmt::prints --- src/shader_recompiler/ir_opt/verification_pass.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 207355ecc..975d5aadf 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -66,7 +66,6 @@ static void ValidateForwardDeclarations(const IR::Program& program) { continue; } if (!definitions.contains(inst.Arg(arg).Inst())) { - fmt::print("{}\n", IR::DumpBlock(*block)); throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); } } @@ -80,7 +79,6 @@ static void ValidatePhiNodes(const IR::Program& program) { for (const IR::Inst& inst : *block) { if (inst.GetOpcode() == IR::Opcode::Phi) { if (no_more_phis) { - fmt::print("{}\n", IR::DumpBlock(*block)); throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); } } else { From 48aafe0961a2ddfb52b627c6ba6bce8276330550 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 17:37:35 -0300 Subject: [PATCH 479/770] glasm: Release phi node registers after they are no longer needed --- .../backend/glasm/emit_glasm.cpp | 53 ++++++++----------- .../glasm/emit_glasm_not_implemented.cpp | 39 +++++++++++--- 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 5ffefaad2..4f838b699 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -172,38 +172,29 @@ bool IsReference(IR::Inst& inst) { return inst.GetOpcode() == IR::Opcode::Reference; } -void Precolor(EmitContext& ctx, const IR::Program& program) { +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwritting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { - switch (phi.Arg(0).Type()) { - case IR::Type::U1: - case IR::Type::U32: - case IR::Type::F32: - ctx.reg_alloc.Define(phi); - break; - case IR::Type::U64: - case IR::Type::F64: - ctx.reg_alloc.LongDefine(phi); - break; - default: - throw NotImplementedException("Phi node type {}", phi.Type()); - } - // Insert phi moves before references to avoid overwritting them - const size_t num_args{phi.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - IR::Block& phi_block{*phi.PhiBlock(i)}; - auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; - IR::IREmitter ir{phi_block, it}; - const IR::Value arg{phi.Arg(i)}; - if (arg.IsImmediate()) { - ir.PhiMove(phi, arg); - } else { - ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); - } - } - for (size_t i = 0; i < num_args; ++i) { - IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); - } + PrecolorInst(phi); } } } @@ -388,7 +379,7 @@ std::string_view GetTessSpacing(TessSpacing spacing) { std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings) { EmitContext ctx{program, bindings, profile, runtime_info}; - Precolor(ctx, program); + Precolor(program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; SetupOptions(program, profile, runtime_info, header); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 3d7a3ebb4..e9d1e0d6b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -17,10 +17,30 @@ namespace Shader::Backend::GLASM { #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) -void EmitPhi(EmitContext& ctx, IR::Inst& inst) { - const size_t num_args{inst.NumArgs()}; +static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { + switch (phi.Arg(0).Type()) { + case IR::Type::U1: + case IR::Type::U32: + case IR::Type::F32: + ctx.reg_alloc.Define(phi); + break; + case IR::Type::U64: + case IR::Type::F64: + ctx.reg_alloc.LongDefine(phi); + break; + default: + throw NotImplementedException("Phi node type {}", phi.Type()); + } +} + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { - ctx.reg_alloc.Consume(inst.Arg(i)); + ctx.reg_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + DefinePhi(ctx, phi); } } @@ -30,14 +50,19 @@ void EmitReference(EmitContext& ctx, const IR::Value& value) { ctx.reg_alloc.Consume(value); } -void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { - const Register phi_reg{ctx.reg_alloc.Consume(phi)}; +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + DefinePhi(ctx, phi); + } + const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; const Value eval_value{ctx.reg_alloc.Consume(value)}; - if (phi == value) { + if (phi_reg == eval_value) { return; } - switch (phi.Inst()->Flags()) { + switch (phi.Flags()) { case IR::Type::U1: case IR::Type::U32: case IR::Type::F32: From eacf18cce9a05a28f50e916a752c04b0c9337707 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:28 -0300 Subject: [PATCH 480/770] gl_shader_cache: Rename Program abstractions into Pipeline --- src/video_core/CMakeLists.txt | 8 ++-- ...te_program.cpp => gl_compute_pipeline.cpp} | 18 ++++---- ...ompute_program.h => gl_compute_pipeline.h} | 28 ++++++------- ...s_program.cpp => gl_graphics_pipeline.cpp} | 28 ++++++------- ...phics_program.h => gl_graphics_pipeline.h} | 38 ++++++++--------- .../renderer_opengl/gl_rasterizer.cpp | 10 ++--- .../renderer_opengl/gl_rasterizer.h | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 42 +++++++++---------- .../renderer_opengl/gl_shader_cache.h | 32 +++++++------- .../renderer_opengl/gl_shader_manager.h | 2 +- 10 files changed, 104 insertions(+), 104 deletions(-) rename src/video_core/renderer_opengl/{gl_compute_program.cpp => gl_compute_pipeline.cpp} (91%) rename src/video_core/renderer_opengl/{gl_compute_program.h => gl_compute_pipeline.h} (58%) rename src/video_core/renderer_opengl/{gl_graphics_program.cpp => gl_graphics_pipeline.cpp} (93%) rename src/video_core/renderer_opengl/{gl_graphics_program.h => gl_graphics_pipeline.h} (70%) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8250f736c..1ef3a6189 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,14 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h - renderer_opengl/gl_compute_program.cpp - renderer_opengl/gl_compute_program.h + renderer_opengl/gl_compute_pipeline.cpp + renderer_opengl/gl_compute_pipeline.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_graphics_program.cpp - renderer_opengl/gl_graphics_program.h + renderer_opengl/gl_graphics_pipeline.cpp + renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp similarity index 91% rename from src/video_core/renderer_opengl/gl_compute_program.cpp rename to src/video_core/renderer_opengl/gl_compute_pipeline.cpp index ce52a0052..700ebd8b8 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -5,7 +5,7 @@ #include #include "common/cityhash.h" -#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL { @@ -17,20 +17,20 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; -size_t ComputeProgramKey::Hash() const noexcept { +size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); } -bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { +bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { return std::memcmp(this, &rhs, sizeof *this) == 0; } -ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) +ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { @@ -53,7 +53,7 @@ ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer ASSERT(num_images <= MAX_IMAGES); } -void ComputeProgram::Configure() { +void ComputePipeline::Configure() { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h similarity index 58% rename from src/video_core/renderer_opengl/gl_compute_program.h rename to src/video_core/renderer_opengl/gl_compute_pipeline.h index ddb00dc1d..e3b94e2f3 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -30,30 +30,30 @@ namespace OpenGL { class ProgramManager; -struct ComputeProgramKey { +struct ComputePipelineKey { u64 unique_hash; u32 shared_memory_size; std::array workgroup_size; size_t Hash() const noexcept; - bool operator==(const ComputeProgramKey&) const noexcept; + bool operator==(const ComputePipelineKey&) const noexcept; - bool operator!=(const ComputeProgramKey& rhs) const noexcept { + bool operator!=(const ComputePipelineKey& rhs) const noexcept { return !operator==(rhs); } }; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); -class ComputeProgram { +class ComputePipeline { public: - explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); @@ -76,8 +76,8 @@ private: namespace std { template <> -struct hash { - size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { +struct hash { + size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept { return k.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp similarity index 93% rename from src/video_core/renderer_opengl/gl_graphics_program.cpp rename to src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 7c3d23f85..32df35202 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -7,7 +7,7 @@ #include "common/cityhash.h" #include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/texture_cache/texture_cache.h" @@ -62,22 +62,22 @@ std::pair TransformFeedbackEnum(u8 location) { } } // Anonymous namespace -size_t GraphicsProgramKey::Hash() const noexcept { +size_t GraphicsPipelineKey::Hash() const noexcept { return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); } -bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { +bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) +GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( @@ -126,7 +126,7 @@ struct Spec { static constexpr bool has_images = true; }; -void GraphicsProgram::Configure(bool is_indexed) { +void GraphicsPipeline::Configure(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; @@ -347,7 +347,7 @@ void GraphicsProgram::Configure(bool is_indexed) { } } -void GraphicsProgram::GenerateTransformFeedbackState( +void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. @@ -394,7 +394,7 @@ void GraphicsProgram::GenerateTransformFeedbackState( num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } -void GraphicsProgram::ConfigureTransformFeedbackImpl() const { +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); } diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h similarity index 70% rename from src/video_core/renderer_opengl/gl_graphics_program.h rename to src/video_core/renderer_opengl/gl_graphics_pipeline.h index 53a57ede5..62f700cf5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -24,7 +24,7 @@ class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsProgramKey { +struct GraphicsPipelineKey { std::array unique_hashes; union { u32 raw; @@ -40,34 +40,34 @@ struct GraphicsProgramKey { size_t Hash() const noexcept; - bool operator==(const GraphicsProgramKey&) const noexcept; + bool operator==(const GraphicsPipelineKey&) const noexcept; - bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { return !operator==(rhs); } [[nodiscard]] size_t Size() const noexcept { if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); + return sizeof(GraphicsPipelineKey); } else { - return offsetof(GraphicsProgramKey, padding); + return offsetof(GraphicsPipelineKey, padding); } } }; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { +class GraphicsPipeline { public: - explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); + explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); void Configure(bool is_indexed); @@ -110,8 +110,8 @@ private: namespace std { template <> -struct hash { - size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { +struct hash { + size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept { return k.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 51ff42ee9..72a6dfd2a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -218,13 +218,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; + GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - program->Configure(is_indexed); + pipeline->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(program, primitive_mode); + BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -271,7 +271,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; if (!program) { return; } @@ -996,7 +996,7 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 08f509c19..afd43b2ee 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -194,7 +194,7 @@ private: void SyncVertexInstances(); /// Begin a transform feedback - void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); + void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ceec83a8a..33757938a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -152,7 +152,7 @@ GLenum AssemblyStage(size_t stage_index) { return GL_NONE; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); @@ -282,7 +282,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; -GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { +GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; } @@ -302,18 +302,18 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { - program = CreateGraphicsProgram(); + program = CreateGraphicsPipeline(); } return program.get(); } -ComputeProgram* ShaderCache::CurrentComputeProgram() { +ComputePipeline* ShaderCache::CurrentComputePipeline() { const VideoCommon::ShaderInfo* const shader{ComputeShader()}; if (!shader) { return nullptr; } const auto& qmd{kepler_compute.launch_description}; - const ComputeProgramKey key{ + const ComputePipelineKey key{ .unique_hash = shader->unique_hash, .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, @@ -323,20 +323,20 @@ ComputeProgram* ShaderCache::CurrentComputeProgram() { if (!is_new) { return pipeline.get(); } - pipeline = CreateComputeProgram(key, shader); + pipeline = CreateComputePipeline(key, shader); return pipeline.get(); } -std::unique_ptr ShaderCache::CreateGraphicsProgram() { +std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GraphicsEnvironments environments; GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); + return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); } -std::unique_ptr ShaderCache::CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, +std::unique_ptr ShaderCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{0}; @@ -382,27 +382,27 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( if (!device.UseAssemblyShaders()) { LinkProgram(source_program.handle); } - return std::make_unique( + return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } -std::unique_ptr ShaderCache::CreateComputeProgram( - const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { +std::unique_ptr ShaderCache::CreateComputePipeline( + const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputeProgram(main_pools, key, env, true); + return CreateComputePipeline(main_pools, key, env, true); } -std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel) { +std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -418,9 +418,9 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, program.info, - std::move(source_program), std::move(asm_program)); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, + kepler_compute, program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b49cd0ac7..a56559ea9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -15,8 +15,8 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -55,24 +55,24 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); - [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); - [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: - std::unique_ptr CreateGraphicsProgram(); + std::unique_ptr CreateGraphicsPipeline(); - std::unique_ptr CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel); - std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, - const VideoCommon::ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, + const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel); + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel); Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -81,11 +81,11 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - GraphicsProgramKey graphics_key{}; + GraphicsPipelineKey graphics_key{}; ShaderPools main_pools; - std::unordered_map> graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; Shader::Profile profile; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 5ec57d707..88b734bcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -9,8 +9,8 @@ #include -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { From a49532c8eb29807814214ab326ff970f5a964a03 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:52 -0300 Subject: [PATCH 481/770] video_core,shader: Clang-format fixes --- src/shader_recompiler/frontend/ir/patch.cpp | 2 +- src/shader_recompiler/frontend/ir/post_order.h | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp index 1f770bc48..4c956a970 100644 --- a/src/shader_recompiler/frontend/ir/patch.cpp +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -2,8 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/patch.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h index 58a0467a0..07bfbadc3 100644 --- a/src/shader_recompiler/frontend/ir/post_order.h +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -4,8 +4,8 @@ #pragma once -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" +#include "shader_recompiler/frontend/ir/basic_block.h" namespace Shader::IR { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7e39b65bd..d50647ba7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6600fb142..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,11 +6,11 @@ #include #include -#include #include #include #include #include +#include #include "common/alignment.h" #include "common/common_types.h" From a41b2ed3917f9ca5af30773e4671f4829380dceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 20:39:55 -0300 Subject: [PATCH 482/770] gl_shader_cache: Add disk shader cache --- .../renderer_opengl/gl_rasterizer.cpp | 4 +- .../renderer_opengl/gl_shader_cache.cpp | 113 +++++++++++++++++- .../renderer_opengl/gl_shader_cache.h | 10 +- 3 files changed, 116 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 72a6dfd2a..eec01e8c2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,7 +140,9 @@ void RasterizerOpenGL::SyncVertexInstances() { } void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) {} + const VideoCore::DiskResourceLoadCallback& callback) { + shader_cache.LoadDiskResources(title_id, stop_loading, callback); +} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 33757938a..3aa5ac31d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,17 +3,19 @@ // Refer to the license.txt file included. #include +#include #include #include -#include #include #include -#include #include "common/alignment.h" #include "common/assert.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -40,6 +42,8 @@ using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; template @@ -154,8 +158,6 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { - UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); - Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -282,6 +284,89 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; +void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "new_opengl"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + + struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; + }; + Common::StatefulThreadWorker workers( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); + + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + ctx->pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + ctx->pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); +} + GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; @@ -332,7 +417,18 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; + if (shader_cache_filename.empty()) { + return pipeline; + } + boost::container::static_vector env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] != 0) { + env_ptrs.push_back(&environments.envs[index]); + } + } + VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( @@ -396,7 +492,12 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputePipeline(main_pools, key, env, true); + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; + if (!shader_cache_filename.empty()) { + VideoCommon::SerializePipeline(key, std::array{&env}, + shader_cache_filename); + } + return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a56559ea9..16175318b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include #include @@ -23,10 +25,6 @@ namespace Tegra { class MemoryManager; } -namespace Core::Frontend { -class EmuWindow; -} - namespace OpenGL { class Device; @@ -55,6 +53,9 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); [[nodiscard]] ComputePipeline* CurrentComputePipeline(); @@ -88,6 +89,7 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + std::filesystem::path shader_cache_filename; }; } // namespace OpenGL From 8763cc1ff7909cb72902db7988f9e205dd0953d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 03:34:59 -0300 Subject: [PATCH 483/770] glasm: Fix global memory callbacks --- .../backend/glasm/emit_glasm_memory.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 33af83212..26b03587e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -37,8 +37,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e continue; } const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; - ctx.Add("LDC.U64 DC.x,c[{}];" // ssbo_addr - "LDC.U32 RC.x,c[{}];" // ssbo_size_u32 + ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr + "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 @@ -48,9 +48,10 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr "PK64.U DC.y,c[{}];" // host_ssbo = cbuf "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset - "{}", - "ELSE;", index, index, ssbo.cbuf_offset, ssbo.cbuf_offset + 8, address, address, - address, index, then_expr); + "{}" + "ELSE;", + ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, + address, address, index, then_expr); } if (!else_expr.empty()) { ctx.Add("{}", else_expr); From b9c8814ea978d2192522f9c85c07c00280bc1696 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 15:40:16 -0300 Subject: [PATCH 484/770] glasm: Implement undef instructions --- .../backend/glasm/emit_glasm_instructions.h | 10 +++++----- .../glasm/emit_glasm_not_implemented.cpp | 20 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index cc7aa8e20..c24c7a71d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -74,11 +74,11 @@ void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); -void EmitUndefU1(EmitContext& ctx); -void EmitUndefU8(EmitContext& ctx); -void EmitUndefU16(EmitContext& ctx); -void EmitUndefU32(EmitContext& ctx); -void EmitUndefU64(EmitContext& ctx); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index e9d1e0d6b..c76b45b8b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -209,24 +209,24 @@ void EmitYDirection(EmitContext& ctx) { NotImplemented(); } -void EmitUndefU1(EmitContext& ctx) { - NotImplemented(); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); } -void EmitUndefU8(EmitContext& ctx) { - NotImplemented(); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); } -void EmitUndefU16(EmitContext& ctx) { - NotImplemented(); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;", inst); } -void EmitUndefU32(EmitContext& ctx) { - NotImplemented(); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.Add("MOV.S {}.x,0;\n", inst); } -void EmitUndefU64(EmitContext& ctx) { - NotImplemented(); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.LongAdd("MOV.S64 {}.x,0;", inst); } void EmitGetZeroFromOp(EmitContext& ctx) { From 586c785366307cb3c648bd33345b431b8312612d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 15:41:53 -0300 Subject: [PATCH 485/770] glasm: Skip phi moves on undefined instructions --- .../backend/glasm/emit_glasm_not_implemented.cpp | 4 +++- src/shader_recompiler/frontend/ir/value.h | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index c76b45b8b..022b6584c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -58,7 +58,9 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; const Value eval_value{ctx.reg_alloc.Consume(value)}; - + if (!value.IsImmediate() && IR::IsUndef(RegAlloc::AliasInst(*value.Inst()))) { + return; + } if (phi_reg == eval_value) { return; } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 0c6bf684d..090cc1739 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -395,4 +395,17 @@ inline f64 Value::F64() const { return inst.GetOpcode() == Opcode::Phi; } +[[nodiscard]] inline bool IsUndef(const Inst& inst) { + switch (inst.GetOpcode()) { + case Opcode::UndefU1: + case Opcode::UndefU8: + case Opcode::UndefU16: + case Opcode::UndefU32: + case Opcode::UndefU64: + return true; + default: + return false; + } +} + } // namespace Shader::IR From f58f79c85dad7ad018a015cc6913f2789540ec22 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 16:00:36 -0300 Subject: [PATCH 486/770] glasm: Implement Y direction --- src/shader_recompiler/backend/glasm/emit_context.h | 2 ++ src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 +++ .../backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 5 +++-- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 057d74790..cd4213cb7 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -70,6 +70,8 @@ public: Stage stage{}; std::string_view stage_name = "invalid"; std::string_view attrib_name = "invalid"; + + bool uses_y_direction{}; }; } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4f838b699..2a0524609 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -439,6 +439,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I "MOV.F FSWZB[2],1;" "MOV.F FSWZB[3],-1;"; } + if (ctx.uses_y_direction) { + header += "PARAM y_direction[1]={state.material.front.ambient};"; + } ctx.code.insert(0, header); ctx.code += "END"; return ctx.code; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index c24c7a71d..7b0f75405 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -71,7 +71,7 @@ void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); -void EmitYDirection(EmitContext& ctx); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 022b6584c..2a92d9df5 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -207,8 +207,9 @@ void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); } -void EmitYDirection(EmitContext& ctx) { - NotImplemented(); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); } void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { From adb591a757ccb289634920d51cf519b515ca32b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:32:59 -0300 Subject: [PATCH 487/770] glasm: Use storage buffers instead of global memory when possible --- src/shader_recompiler/CMakeLists.txt | 1 - .../backend/glasm/emit_context.cpp | 13 +- .../backend/glasm/emit_glasm.h | 5 +- .../backend/glasm/emit_glasm_atomic.cpp | 351 ---------------- .../backend/glasm/emit_glasm_memory.cpp | 380 +++++++++++++++++- src/shader_recompiler/profile.h | 3 + .../renderer_opengl/gl_buffer_cache.cpp | 26 +- .../renderer_opengl/gl_buffer_cache.h | 6 + .../renderer_opengl/gl_compute_pipeline.cpp | 42 +- .../renderer_opengl/gl_compute_pipeline.h | 12 +- src/video_core/renderer_opengl/gl_device.cpp | 18 +- src/video_core/renderer_opengl/gl_device.h | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 19 +- .../renderer_opengl/gl_graphics_pipeline.h | 12 +- .../renderer_opengl/gl_rasterizer.cpp | 13 +- .../renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_opengl/gl_shader_cache.cpp | 30 +- 17 files changed, 503 insertions(+), 437 deletions(-) delete mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index becdb7d54..d6d8e5f59 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC backend/glasm/emit_context.h backend/glasm/emit_glasm.cpp backend/glasm/emit_glasm.h - backend/glasm/emit_glasm_atomic.cpp backend/glasm/emit_glasm_barriers.cpp backend/glasm/emit_glasm_bitwise_conversion.cpp backend/glasm/emit_glasm_composite.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index b5b0e2204..e18526816 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -40,13 +41,21 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } + u32 ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { if (desc.count != 1) { throw NotImplementedException("Storage buffer descriptor array"); } + if (runtime_info.glasm_use_storage_buffers) { + Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); + ++bindings.storage_buffer; + ++ssbo_index; + } } - if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { - Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + if (!runtime_info.glasm_use_storage_buffers) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { + Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + } } stage = program.stage; switch (program.stage) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3d02d873e..3df32a4a6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM { [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); -[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { +[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program) { Bindings binding; - return EmitGLASM(profile, {}, program, binding); + return EmitGLASM(profile, runtime_info, program, binding); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp deleted file mode 100644 index e72b252a3..000000000 --- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/backend/glasm/emit_context.h" -#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::Backend::GLASM { -namespace { -void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, - std::string_view then_expr, std::string_view else_expr = {}) { - // Operate on bindless SSBO, call the expression with bounds checking - // address = c[binding].xy - // length = c[binding].z - const u32 sb_binding{binding.U32()}; - ctx.Add("PK64.U DC,c[{}];" // pointer = address - "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) - "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset - "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length - sb_binding, offset, offset, sb_binding); - if (else_expr.empty()) { - ctx.Add("IF NE.x;{}ENDIF;", then_expr); - } else { - ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); - } -} - -template -void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, - ValueType value, std::string_view operation, std::string_view size) { - const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, - fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); -} -} // namespace - -void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarS32 value) { - ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarS32 value) { - ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - Register value) { - ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "ADD", "U32"); -} - -void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarS32 value) { - Atom(ctx, inst, binding, offset, value, "MIN", "S32"); -} - -void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "MIN", "U32"); -} - -void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarS32 value) { - Atom(ctx, inst, binding, offset, value, "MAX", "S32"); -} - -void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "MAX", "U32"); -} - -void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); -} - -void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); -} - -void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "AND", "U32"); -} - -void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "OR", "U32"); -} - -void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "XOR", "U32"); -} - -void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); -} - -void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "ADD", "U64"); -} - -void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "S64"); -} - -void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "U64"); -} - -void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "S64"); -} - -void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "U64"); -} - -void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "AND", "U64"); -} - -void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "OR", "U64"); -} - -void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "XOR", "U64"); -} - -void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); -} - -void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarF32 value) { - Atom(ctx, inst, binding, offset, value, "ADD", "F32"); -} - -void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); -} - -void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); -} - -void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); -} - -void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicIAdd32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMin32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMin32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMax32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMax32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicInc32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicDec32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAnd32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicOr32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicXor32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicExchange32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicIAdd64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMin64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMin64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMax64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMax64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicInc64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicDec64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAnd64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicOr64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicXor64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicExchange64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMinF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMinF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMaxF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMaxF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} -} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 26b03587e..90dbb80d2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, } } -void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr, +void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, std::string_view else_expr = {}) { const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; for (size_t index = 0; index < num_buffers; ++index) { @@ -44,14 +45,22 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 "AND.U.CC RC.x,RC.x,RC.y;" - "IF NE.x;" // a && b - "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr - "PK64.U DC.y,c[{}];" // host_ssbo = cbuf - "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset - "{}" - "ELSE;", + "IF NE.x;" // a && b + "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, - address, address, index, then_expr); + address, address); + if (pointer_based) { + ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf + "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset + "{}" + "ELSE;", + index, expr); + } else { + ctx.Add("CVT.U32.U64 RC.x,DC.x;" + "{},ssbo{}[RC.x];" + "ELSE;", + expr, index); + } } if (!else_expr.empty()) { ctx.Add("{}", else_expr); @@ -64,25 +73,54 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e template void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, std::string_view size) { - StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + } } void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), - fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + } } template void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { - GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); + } } void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret), - fmt::format("MOV.S {},0;", ret)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.S {},0;", ret)); + } +} + +template +void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + ValueType value, std::string_view operation, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), + offset); + } else { + StorageOp(ctx, binding, offset, + fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); + } } } // Anonymous namespace @@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o Write(ctx, binding, offset, value, "U32X4"); } +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value) { + ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U32"); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U32"); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U32"); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "AND", "U32"); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "OR", "U32"); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U32"); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U64"); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S64"); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U64"); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S64"); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U64"); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "AND", "U64"); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "OR", "U64"); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U64"); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F32"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c46452c3d..f8913bf14 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -111,7 +111,10 @@ struct RuntimeInfo { std::optional alpha_test_func; float alpha_test_reference{}; + // Static y negate value bool y_negate{}; + // Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2d0ef1307..334ed470f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + const GLuint base_binding = graphics_base_storage_bindings[stage]; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); - } else { - const GLuint base_binding = graphics_base_storage_bindings[stage]; - const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), - static_cast(offset), static_cast(size)); } } void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + if (size != 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, reinterpret_cast(&ssbo)); - } else if (size == 0) { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), - static_cast(offset), static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4986c65fd..bc16abafb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -147,6 +147,10 @@ public: image_handles = image_handles_; } + void SetEnableStorageBuffers(bool use_storage_buffers_) { + use_storage_buffers = use_storage_buffers_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -160,6 +164,8 @@ private: bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + bool use_storage_buffers = false; + u32 max_attributes = 0; std::array graphics_base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 700ebd8b8..5cf5f97a9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -17,6 +17,15 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; +template +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); @@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep return std::memcmp(this, &rhs, sizeof *this) == 0; } -ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } + + num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); + num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + + const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); + + const bool is_glasm{assembly_program.handle != 0}; + const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + use_storage_buffers = + !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory = !use_storage_buffers && + std::ranges::any_of(info.storage_buffers_descriptors, + [](const auto& desc) { return desc.is_written; }); } void ComputePipeline::Configure() { @@ -150,6 +159,7 @@ void ComputePipeline::Configure() { buffer_cache.UpdateComputeBuffers(); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index e3b94e2f3..dd6b62ef2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -28,6 +28,7 @@ struct Info; namespace OpenGL { +class Device; class ProgramManager; struct ComputePipelineKey { @@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v); class ComputePipeline { public: - explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: TextureCache& texture_cache; BufferCache& buffer_cache; @@ -70,6 +75,9 @@ private: u32 num_texture_buffers{}; u32 num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 18bbc4c1f..01da2bb57 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -135,13 +135,13 @@ Device::Device() { "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - max_uniform_buffers = BuildMaxUniformBuffers(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); + max_glasm_storage_buffer_blocks = GetInteger(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -236,22 +236,6 @@ std::string Device::GetVendorName() const { return vendor_name; } -Device::Device(std::nullptr_t) { - max_uniform_buffers.fill(std::numeric_limits::max()); - uniform_buffer_alignment = 4; - shader_storage_alignment = 4; - max_vertex_attributes = 16; - max_varyings = 15; - max_compute_shared_memory_size = 0x10000; - has_warp_intrinsics = true; - has_shader_ballot = true; - has_vertex_viewport_layer = true; - has_image_load_formatted = true; - has_texture_shadow_lod = true; - has_variable_aoffi = true; - has_depth_buffer_float = true; -} - bool Device::TestVariableAoffi() { return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 152a3acd3..d67f5693c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -13,7 +13,6 @@ namespace OpenGL { class Device { public: explicit Device(); - explicit Device(std::nullptr_t); [[nodiscard]] std::string GetVendorName() const; @@ -41,6 +40,10 @@ public: return max_compute_shared_memory_size; } + u32 GetMaxGLASMStorageBufferBlocks() const { + return max_glasm_storage_buffer_blocks; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -124,6 +127,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; + u32 max_glasm_storage_buffer_blocks{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 32df35202..19d85c482 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; template -u32 AccumulateCount(Range&& range) { +u32 AccumulateCount(const Range& range) { u32 num{}; for (const auto& desc : range) { num += desc.count; @@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu } u32 num_textures{}; u32 num_images{}; + u32 num_storage_buffers{}; for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; if (stage < 4) { @@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu num_textures += AccumulateCount(info.texture_descriptors); num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); - if (assembly_programs[0].handle != 0 && xfb_state) { + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } } @@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 62f700cf5..c1113e180 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,6 +20,7 @@ namespace OpenGL { +class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -77,6 +78,10 @@ public: } } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -99,6 +104,9 @@ private: std::array num_texture_buffers{}; std::array num_image_buffers{}; + bool use_storage_buffers{}; + bool writes_global_memory{}; + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eec01e8c2..5d4e80364 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { - ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; - if (!program) { + ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; + if (!pipeline) { return; } - program->Configure(); + pipeline->Configure(); const auto& qmd{kepler_compute.launch_description}; glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() { // Make sure memory stored from the previous GL command stream is visible // This is only needed on assembly shaders where we write to GPU memory with raw pointers - // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used - // and prefer using NV_shader_storage_buffer_object when possible - if (Settings::values.use_assembly_shaders.GetValue()) { + if (has_written_global_memory) { + has_written_global_memory = false; glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); } glFlush(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index afd43b2ee..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -225,7 +225,8 @@ private: std::array image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. - std::size_t num_queued_commands = 0; + size_t num_queued_commands = 0; + bool has_written_global_memory = false; u32 last_clip_distance_mask = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3aa5ac31d..287f497b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) { } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + bool glasm_use_storage_buffers) { Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, info.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + info.glasm_use_storage_buffers = glasm_use_storage_buffers; return info; } @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); - size_t env_index{0}; + size_t env_index{}; + u32 total_storage_buffers{}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -447,7 +450,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } } + const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; + const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; + std::array infos{}; OGLProgram source_program; @@ -466,7 +476,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; if (device.UseAssemblyShaders()) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); @@ -479,7 +489,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( LinkProgram(source_program.handle); } return std::make_unique( - texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } @@ -508,10 +518,18 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + + u32 num_storage_buffers{}; + for (const auto& desc : program.info.storage_buffers_descriptors) { + num_storage_buffers += desc.count; + } + Shader::RuntimeInfo info; + info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + OGLAssemblyProgram asm_program; OGLProgram source_program; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { const std::vector code{EmitSPIRV(profile, program)}; @@ -519,7 +537,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); } From 2aa30353b7990789a613fe22525e0a3a19a78656 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:33:34 -0300 Subject: [PATCH 488/770] glasm: Remove unintentional '\n' on Undef32 --- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 2a92d9df5..e6a880a36 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -225,7 +225,7 @@ void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { } void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { - ctx.Add("MOV.S {}.x,0;\n", inst); + ctx.Add("MOV.S {}.x,0;", inst); } void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { From 8f3043c3cf6f6baa1d235e6789533fbf567d1c2d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:42:37 -0300 Subject: [PATCH 489/770] Revert "glasm: Skip phi moves on undefined instructions" Causes regressions on Bowser's Fury. --- .../backend/glasm/emit_glasm_not_implemented.cpp | 4 +--- src/shader_recompiler/frontend/ir/value.h | 13 ------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index e6a880a36..95bcbd750 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -58,9 +58,7 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; const Value eval_value{ctx.reg_alloc.Consume(value)}; - if (!value.IsImmediate() && IR::IsUndef(RegAlloc::AliasInst(*value.Inst()))) { - return; - } + if (phi_reg == eval_value) { return; } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 090cc1739..0c6bf684d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -395,17 +395,4 @@ inline f64 Value::F64() const { return inst.GetOpcode() == Opcode::Phi; } -[[nodiscard]] inline bool IsUndef(const Inst& inst) { - switch (inst.GetOpcode()) { - case Opcode::UndefU1: - case Opcode::UndefU8: - case Opcode::UndefU16: - case Opcode::UndefU32: - case Opcode::UndefU64: - return true; - default: - return false; - } -} - } // namespace Shader::IR From e240a62017f240160896b617bb109a0bcea08516 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 21:24:30 -0300 Subject: [PATCH 490/770] glasm: Fix global memory fallbacks --- .../backend/glasm/emit_glasm_memory.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 90dbb80d2..cafb5c92a 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -38,14 +38,14 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std continue; } const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; - ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr - "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 - "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 - "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size - "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 - "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 - "AND.U.CC RC.x,RC.x,RC.y;" - "IF NE.x;" // a && b + ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr + "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 + "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 + "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size + "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0 + "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0 + "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b + "IF NE.x;" // if cond "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, address, address); @@ -65,7 +65,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std if (!else_expr.empty()) { ctx.Add("{}", else_expr); } - for (size_t index = 0; index < num_buffers; ++index) { + const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()}; + for (size_t index = 0; index < num_used_buffers; ++index) { ctx.Add("ENDIF;"); } } From 3b595fe8b28001eed4a936e2a7b465bd67dcc4b7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 16:47:49 -0300 Subject: [PATCH 491/770] glasm: Prepare XFB from state instead of global registers --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 19d85c482..38ec88b13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -362,13 +362,11 @@ void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. - const auto& regs{maxwell3d.regs}; - GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; + const auto& layout = xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -383,7 +381,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = regs.tfb_varying_locs[feedback]; + const auto& locations = xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; From 83db7abae6550f89ba907789efdf818d93be1b5d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:49:29 -0300 Subject: [PATCH 492/770] glasm: Use integer lod for TXQ --- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 2 +- src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 34725b8c6..1c8c70661 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -529,7 +529,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarF32 lod) { + ScalarS32 lod) { const auto info{inst.Flags()}; const std::string texture{Texture(ctx, info, index)}; const std::string_view type{TextureType(info)}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 7b0f75405..c9f4826ce 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -546,7 +546,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarF32 lod); + ScalarS32 lod); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& derivatives, From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: [PATCH 493/770] shader: Handle host exceptions --- src/shader_recompiler/exception.h | 40 +++++++++++++---- .../frontend/maxwell/opcodes.cpp | 2 +- .../frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/translate/translate.cpp | 13 ++++-- .../renderer_opengl/gl_rasterizer.cpp | 4 +- .../renderer_opengl/gl_shader_cache.cpp | 43 +++++++++++-------- .../renderer_opengl/gl_shader_cache.h | 5 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 35 ++++++++++----- 8 files changed, 98 insertions(+), 45 deletions(-) diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h index 6fe620801..013d7b1bf 100644 --- a/src/shader_recompiler/exception.h +++ b/src/shader_recompiler/exception.h @@ -5,38 +5,62 @@ #pragma once #include +#include +#include #include #include namespace Shader { -class LogicError : public std::logic_error { +class Exception : public std::exception { +public: + explicit Exception(std::string message_) noexcept : message{std::move(message_)} {} + + const char* what() const override { + return message.c_str(); + } + + void Prepend(std::string_view prepend) { + message.insert(0, prepend); + } + + void Append(std::string_view append) { + message += append; + } + +private: + std::string message; +}; + +class LogicError : public Exception { public: template LogicError(const char* message, Args&&... args) - : std::logic_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; -class RuntimeError : public std::runtime_error { +class RuntimeError : public Exception { public: template RuntimeError(const char* message, Args&&... args) - : std::runtime_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; -class NotImplementedException : public std::logic_error { +class NotImplementedException : public Exception { public: template NotImplementedException(const char* message, Args&&... args) - : std::logic_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} { + Append(" is not implemented"); + } }; -class InvalidArgument : public std::invalid_argument { +class InvalidArgument : public Exception { public: template InvalidArgument(const char* message, Args&&... args) - : std::invalid_argument{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp index 12ddf2ac9..ccc40c20c 100644 --- a/src/shader_recompiler/frontend/maxwell/opcodes.cpp +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, +#define INST(name, cute, encode) cute, #include "maxwell.inc" #undef INST }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ccdab1dad..900fc7ab1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/maxwell/program.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 0f4e7a251..8e3c4c5d5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -30,16 +30,21 @@ void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 locat TranslatorVisitor visitor{env, *block}; for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; - const Opcode opcode{Decode(insn)}; - switch (opcode) { + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { #define INST(name, cute, mask) \ case Opcode::name: \ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ break; #include "shader_recompiler/frontend/maxwell/maxwell.inc" #undef OPCODE - default: - throw LogicError("Invalid opcode {}", opcode); + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5d4e80364..54696d97d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -221,7 +221,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; - + if (!pipeline) { + return; + } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 287f497b5..7d2ec4efa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -45,6 +45,7 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::SerializePipeline; template auto MakeSpan(Container& container) { @@ -327,10 +328,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, workers.QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; - + auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -348,10 +350,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; - + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + graphics_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -419,8 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (shader_cache_filename.empty()) { + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + if (!pipeline || shader_cache_filename.empty()) { return pipeline; } boost::container::static_vector env_ptrs; @@ -429,13 +432,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, - bool build_in_parallel) { + ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -492,6 +495,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } std::unique_ptr ShaderCache::CreateComputePipeline( @@ -502,18 +509,17 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!shader_cache_filename.empty()) { - VideoCommon::SerializePipeline(key, std::array{&env}, - shader_cache_filename); + auto pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline || shader_cache_filename.empty()) { + return pipeline; } + SerializePipeline(key, std::array{&env}, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel) { + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -540,6 +546,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16175318b..cf74d34e4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,15 +65,14 @@ private: std::unique_ptr CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs, bool build_in_parallel); + std::span envs); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); std::unique_ptr CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel); + Shader::Environment& env); Core::Frontend::EmuWindow& emu_window; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f86bf9c30..b6998e37c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -303,6 +303,9 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } if (current_pipeline) { current_pipeline->AddTransition(pipeline.get()); } @@ -362,9 +365,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, env, false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -405,7 +409,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs, bool build_in_parallel) { + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -458,6 +462,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { @@ -466,7 +474,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (pipeline_cache_filename.empty()) { + if (!pipeline || pipeline_cache_filename.empty()) { return pipeline; } serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { @@ -477,7 +485,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -491,18 +499,19 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!pipeline_cache_filename.empty()) { - serialization_thread.QueueWork([this, key, env = std::move(env)] { - VideoCommon::SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); - }); + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); return pipeline; } std::unique_ptr PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, - bool build_in_parallel) { + bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -517,6 +526,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, thread_worker, program.info, std::move(spv_module)); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } } // namespace Vulkan From 56d4a9ebde4afa18329ba6df4995ed9ef2aa1ca1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:53:27 -0300 Subject: [PATCH 494/770] texture_cache: Reduce invalid image/sampler error severity --- src/video_core/texture_cache/texture_cache.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 255b07cf8..f34c9d9ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -548,13 +548,13 @@ void TextureCache

::FillComputeImageViews(std::span indices, template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - [[unlikely]] if (index > graphics_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = graphics_sampler_table.Read(index); SamplerId& id = graphics_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -562,13 +562,13 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - [[unlikely]] if (index > compute_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = compute_sampler_table.Read(index); SamplerId& id = compute_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -669,7 +669,7 @@ ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, std::span cached_image_view_ids, u32 index) { if (index > table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } const auto [descriptor, is_new] = table.Read(index); From b6c087496b14f1f5b253c3ecb82c00ded743418a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:54:08 -0300 Subject: [PATCH 495/770] glasm: Reduce reg allocation leaks from an exception to a log --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2a0524609..e23208d2c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -253,7 +253,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } if (!ctx.reg_alloc.IsEmpty()) { - throw LogicError("Register allocator is not empty"); + // LOG_WARNING ...; } } From 871c9f1cedadd29ad069a33d5ee9bd0c103015c6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 19:03:49 -0300 Subject: [PATCH 496/770] shader/exception: Fix compilation errors on gcc --- src/shader_recompiler/exception.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h index 013d7b1bf..43f08162d 100644 --- a/src/shader_recompiler/exception.h +++ b/src/shader_recompiler/exception.h @@ -15,22 +15,22 @@ namespace Shader { class Exception : public std::exception { public: - explicit Exception(std::string message_) noexcept : message{std::move(message_)} {} + explicit Exception(std::string message) noexcept : err_message{std::move(message)} {} - const char* what() const override { - return message.c_str(); + const char* what() const noexcept override { + return err_message.c_str(); } void Prepend(std::string_view prepend) { - message.insert(0, prepend); + err_message.insert(0, prepend); } void Append(std::string_view append) { - message += append; + err_message += append; } private: - std::string message; + std::string err_message; }; class LogicError : public Exception { From fbf5cdcba0f3b03e8cd3019fb285a96037b05f26 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 19:04:26 -0300 Subject: [PATCH 497/770] shader: Fix FSwizzleAdd folding when going through phi nodes --- src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 66f1391db..796b4122d 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -440,13 +440,13 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { // DPdxFine if (index.U32() == 1) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); + inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)})); } } else if (swizzle_value == 0xA5) { // DPdyFine if (index.U32() == 2) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); + inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)})); } } } From b659212dbdcac6e4f54a4306fd716b7fb74505ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 19:59:22 -0300 Subject: [PATCH 498/770] shader: Fix TMML queries --- .../translate/impl/texture_mipmap_level.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 2277d24ff..abf87a0df 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -84,9 +84,6 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - - IR::F32 transform_constant{v.ir.Imm32(256.0f)}; - const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; @@ -107,9 +104,16 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::F32 value{v.ir.CompositeExtract(sample, element)}; if (element < 2) { - value = v.ir.FPMul(value, transform_constant); + IR::U32 casted_value; + if (element == 0) { + casted_value = v.ir.ConvertFToU(32, value); + } else { + casted_value = v.ir.ConvertFToS(16, value); + } + v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); + } else { + v.F(dest_reg, value); } - v.F(dest_reg, value); ++dest_reg; } } From dd39b87b0cfb640418278c55669f621bfd4addf5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 27 May 2021 23:29:22 -0400 Subject: [PATCH 499/770] shader: Adhere to disk shader cache setting --- src/yuzu/bootmanager.cpp | 13 +++++++------ src/yuzu_cmd/yuzu.cpp | 8 +++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index d72ca5acc..25b658b2a 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -64,12 +64,13 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); - system.Renderer().ReadRasterizer()->LoadDiskResources( - system.CurrentProcess()->GetTitleID(), stop_token, - [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { - emit LoadProgress(stage, value, total); - }); - + if (Settings::values.use_disk_shader_cache.GetValue()) { + system.Renderer().ReadRasterizer()->LoadDiskResources( + system.CurrentProcess()->GetTitleID(), stop_token, + [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { + emit LoadProgress(stage, value, total); + }); + } emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); gpu.ReleaseContext(); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index ac4ea88d3..35ce23696 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -218,9 +218,11 @@ int main(int argc, char** argv) { // Core is loaded, start the GPU (makes the GPU contexts current to this thread) system.GPU().Start(); - system.Renderer().ReadRasterizer()->LoadDiskResources( - system.CurrentProcess()->GetTitleID(), std::stop_token{}, - [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); + if (Settings::values.use_disk_shader_cache.GetValue()) { + system.Renderer().ReadRasterizer()->LoadDiskResources( + system.CurrentProcess()->GetTitleID(), std::stop_token{}, + [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); + } void(system.Run()); while (emu_window->IsOpen()) { From 627161c38e2ee986ea3cc9c7e46e09f54390d701 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 May 2021 01:34:27 -0300 Subject: [PATCH 500/770] shader: Fix secondary textures --- src/shader_recompiler/ir_opt/texture_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 9e0a2fb09..76cab04c2 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -283,8 +283,8 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { } TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { - const u32 secondary_index{cbuf.has_secondary ? cbuf.index : cbuf.secondary_index}; - const u32 secondary_offset{cbuf.has_secondary ? cbuf.offset : cbuf.secondary_offset}; + const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index }; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset }; const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; return env.ReadTextureType(lhs_raw | rhs_raw); From 457dda69ccdec4002e794ad1a810b577af66f79f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 May 2021 01:36:25 -0300 Subject: [PATCH 501/770] shader: Clang-format secondary textures --- src/shader_recompiler/ir_opt/texture_pass.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 76cab04c2..e9098239d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -283,8 +283,8 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { } TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { - const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index }; - const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset }; + const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; return env.ReadTextureType(lhs_raw | rhs_raw); From eb8464cb3d3cd4a3b5a03d8194ec7b34d8becd75 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 28 May 2021 20:47:31 -0300 Subject: [PATCH 502/770] glasm: Fix immediate texture coordinate --- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 1c8c70661..81d5fe72c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -161,6 +161,7 @@ std::string GradOffset(const IR::Value& offset) { std::pair Coord(EmitContext& ctx, const IR::Value& coord) { if (coord.IsImmediate()) { ScopedRegister scoped_reg(ctx.reg_alloc); + ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; } std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; From d738ad4d0ba02be5603712b3f615d4794a71df9c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:49:40 -0300 Subject: [PATCH 503/770] spirv: Fix image and image buffer descriptor index usage --- .../backend/spirv/emit_spirv_image.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 99b883746..cf842e1e0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -162,8 +162,10 @@ Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR } } -Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, - [[maybe_unused]] const IR::Value& index) { +Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) { + if (!index.IsImmediate() || index.U32() != 0) { + throw NotImplementedException("Indirect image indexing"); + } if (info.type == TextureType::Buffer) { const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; if (def.count > 1) { @@ -182,14 +184,14 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, } Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { - if (!index.IsImmediate()) { + if (!index.IsImmediate() || index.U32() != 0) { throw NotImplementedException("Indirect image indexing"); } if (info.type == TextureType::Buffer) { - const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; + const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; return ctx.OpLoad(def.image_type, def.id); } else { - const ImageDefinition def{ctx.images.at(index.U32())}; + const ImageDefinition def{ctx.images.at(info.descriptor_index)}; return ctx.OpLoad(def.image_type, def.id); } } From d093522fac5f3f4c2c27d30c9ad93421460792a0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:51:32 -0300 Subject: [PATCH 504/770] shader: Fix ImageWrite indexing --- .../frontend/maxwell/translate/impl/surface_load_store.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index e1b8aa8ad..7dc793ad7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -145,7 +145,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { case Type::ARRAY_2D: return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); case Type::_3D: - return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); } throw NotImplementedException("Invalid type {}", type); } From 99f2c31b64aa7854690368f4637ef59a546b2d15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:53:39 -0300 Subject: [PATCH 505/770] vulkan_device: Enable float64 and int64 conditionally Add Intel Xe support. --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- src/video_core/vulkan_common/vulkan_device.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e27a2b51e..aabcb0b10 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -251,8 +251,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = true, .shaderCullDistance = true, - .shaderFloat64 = true, - .shaderInt64 = true, + .shaderFloat64 = is_shader_float64_supported, + .shaderInt64 = is_shader_int64_supported, .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, @@ -909,6 +909,8 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_float64_supported = features.shaderFloat64; + is_shader_int64_supported = features.shaderInt64; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ebe073293..693419505 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -314,6 +314,8 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_shader_float64_supported{}; ///< Support for float64. + bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. From 329dea217d05a47ee00bb005eba1f0fc6b3dd0f6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 19:58:36 -0300 Subject: [PATCH 506/770] shader: Always initialize up reference in structure control flow Fixes ubsan issue. --- .../maxwell/structured_control_flow.cpp | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index ebe5c2654..c1e0646e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -101,22 +101,24 @@ struct Statement : ListBaseHook { : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} Statement(Break, Statement* cond_, Statement* up_) : cond{cond_}, up{up_}, type{StatementType::Break} {} - Statement(Return) : type{StatementType::Return} {} - Statement(Kill) : type{StatementType::Kill} {} - Statement(Unreachable) : type{StatementType::Unreachable} {} + Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} + Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} + Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} - Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} - Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} - Statement(Or, Statement* op_a_, Statement* op_b_) - : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(Identity, IR::Condition cond_, Statement* up_) + : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) + : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} - Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) : branch_offset{branch_offset_}, - branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} - Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} - Statement(IndirectBranchCond, u32 location_) - : location{location_}, type{StatementType::IndirectBranchCond} {} + branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} + Statement(Variable, u32 id_, Statement* up_) + : id{id_}, up{up_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_, Statement* up_) + : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -385,7 +387,7 @@ private: void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, std::vector& gotos, Node function_insert_point, std::optional return_label) { - Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; Tree& root{root_stmt.children}; std::unordered_map local_labels; local_labels.reserve(function.blocks.size()); @@ -411,7 +413,8 @@ private: switch (block.end_class) { case Flow::EndClass::Branch: { - Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + Statement* const always_cond{ + pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; if (block.cond == IR::Condition{true}) { const Node true_label{local_labels.at(block.branch_true)}; gotos.push_back( @@ -423,7 +426,7 @@ private: } else { const Node true_label{local_labels.at(block.branch_true)}; const Node false_label{local_labels.at(block.branch_false)}; - Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; gotos.push_back( root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); gotos.push_back(root.insert( @@ -433,14 +436,15 @@ private: } case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, - block.branch_offset)); + block.branch_offset, &root_stmt)); for (const Flow::IndirectBranch& indirect : block.indirect_branches) { const Node indirect_label{local_labels.at(indirect.block)}; - Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; + Statement* cond{ + pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } - root.insert(ip, *pool.Create(Unreachable{})); + root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; @@ -449,16 +453,16 @@ private: break; } case Flow::EndClass::Exit: - root.insert(ip, *pool.Create(Return{})); + root.insert(ip, *pool.Create(Return{}, &root_stmt)); break; case Flow::EndClass::Return: { - Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); break; } case Flow::EndClass::Kill: - root.insert(ip, *pool.Create(Kill{})); + root.insert(ip, *pool.Create(Kill{}, &root_stmt)); break; } } @@ -474,7 +478,7 @@ private: Tree& body{goto_stmt->up->children}; Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); - Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; UpdateTreeUp(if_stmt); body.insert(goto_stmt, *if_stmt); @@ -516,8 +520,8 @@ private: Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const neg_var{pool.Create(Not{}, variable)}; + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; if (!if_body.empty()) { Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; UpdateTreeUp(if_stmt); @@ -528,7 +532,8 @@ private: switch (label_nested_stmt->type) { case StatementType::If: // Update nested if condition - label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); + label_nested_stmt->cond = + pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); break; case StatementType::Loop: break; @@ -550,7 +555,7 @@ private: Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); SanitizeNoBreaks(loop_body); - Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); body.insert(goto_stmt, *loop_stmt); @@ -577,15 +582,15 @@ private: Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); if_body.pop_front(); - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; UpdateTreeUp(if_stmt); body.insert(goto_stmt, *if_stmt); body.erase(goto_stmt); - Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; Tree& parent_tree{parent->up->children}; return parent_tree.insert(std::next(parent), *new_goto); @@ -597,14 +602,14 @@ private: const u32 label_id{goto_stmt->label->id}; Statement* const goto_cond{goto_stmt->cond}; Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; body.insert(goto_stmt, *set_goto_var); body.insert(goto_stmt, *break_stmt); body.erase(goto_stmt); const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; - Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; Tree& parent_tree{loop->up->children}; return parent_tree.insert(std::next(loop), *new_goto); From a7e9756671be5bb99566277709e5becdea774f34 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 02:57:42 -0300 Subject: [PATCH 507/770] buffer_cache: Mark uniform buffers as dirty if any enable bit changes --- src/video_core/buffer_cache/buffer_cache.h | 10 +++++----- .../renderer_opengl/gl_graphics_pipeline.cpp | 4 +++- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 3 +++ 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c92e4c30..d6b9eb99f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -142,7 +142,7 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetEnabledUniformBuffers(const std::array& mask); void SetEnabledComputeUniformBuffers(u32 enabled); @@ -670,13 +670,13 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffers != mask) { + dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffers = mask; } template diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 38ec88b13..976897067 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -100,6 +100,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + enabled_uniform_buffers[stage] = info.constant_buffer_mask; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; @@ -145,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); @@ -153,7 +156,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index c1113e180..bf33ce604 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,6 +99,7 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; + std::array enabled_uniform_buffers{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e5f54a84f..dfe6e6a80 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,6 +218,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { + return info ? info->constant_buffer_mask : 0; + }); auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -259,11 +262,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e362d13c5..4068a0edc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -128,7 +128,10 @@ private: std::vector transitions; std::array spv_modules; + std::array stage_infos; + std::array enabled_uniform_buffers{}; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; vk::PipelineLayout pipeline_layout; From 916ca7432474e891864524dcbc6c879d5cdbfb72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 03:40:19 -0300 Subject: [PATCH 508/770] opengl: Declare fragment outputs even if they are not used Fixes Ori and the Blind Forest's menu on GLASM. For some reason (probably high level optimizations) it is not sanitized on SPIR-V for OpenGL. Vulkan is unaffected by this change. --- src/shader_recompiler/backend/glasm/emit_context.cpp | 10 +++------- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 +-- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/shader_recompiler/profile.h | 4 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++++ 6 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e18526816..08918a5c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -117,13 +117,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile index, index); } } - for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { - if (!info.stores_frag_color[index]) { - continue; - } - if (index == 0) { - Add("OUTPUT frag_color0=result.color;"); - } else { + if (stage == Stage::Fragment) { + Add("OUTPUT frag_color0=result.color;"); + for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index e23208d2c..70ca6f621 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -298,8 +298,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } - const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; - if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { + if (stage == Stage::Fragment) { header += "OPTION ARB_draw_buffers;"; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3e8899f53..7c618125e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1320,7 +1320,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { break; case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { - if (!info.stores_frag_color[index]) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { continue; } frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f8913bf14..f059e3b26 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -84,7 +84,11 @@ struct Profile { bool support_int64_atomics{}; bool warp_size_potentially_larger_than_guest{}; + bool lower_left_origin_mode{}; + /// Fragment outputs have to be declared even if they are not written to avoid undefined values. + /// See Ori and the Blind Forest's main menu for reference. + bool need_declared_frag_colors{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7d2ec4efa..6ea7c0ee8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -276,7 +276,9 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_int64_atomics = false, .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b6998e37c..cec51cc77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,9 +274,16 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .ignore_nan_fp_comparisons = false, }; } From c44b16124fcfb64b9482d639ae55670005eb6307 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:42:42 -0300 Subject: [PATCH 509/770] vk_buffer_cache: Add transform feedback usage to buffers --- .../renderer_vulkan/vk_buffer_cache.cpp | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 568993c58..2da3de6de 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -60,6 +60,27 @@ std::array MakeQuadIndices(u32 quad, u32 first) { } return indices; } + +vk::Buffer CreateBuffer(const Device& device, u64 size) { + VkBufferUsageFlags flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (device.IsExtTransformFeedbackSupported()) { + flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + } + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -68,21 +89,7 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, - buffer{device->GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - })}, + device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); From 77372443c3d6b20d7f78366bb4aa162f22bd7cde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:43:47 -0300 Subject: [PATCH 510/770] vulkan: Enable depth bounds and use it conditionally Intel devices pre-Xe don't support this. --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dfe6e6a80..d381109d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -598,13 +598,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthCompareOp = dynamic.depth_test_enable ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), .stencilTestEnable = dynamic.stencil_enable, .front = GetStencilFaceState(dynamic.front), .back = GetStencilFaceState(dynamic.back), .minDepthBounds = 0.0f, .maxDepthBounds = 0.0f, }; + if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + } static_vector cb_attachments; const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ef14e91e7..9611b480a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -682,6 +682,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } + bool enabled = regs.depth_bounds_enable; + if (enabled && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + enabled = false; + } scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aabcb0b10..0a42efb6a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -226,7 +226,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthClamp = true, .depthBiasClamp = true, .fillModeNonSolid = true, - .depthBounds = false, + .depthBounds = is_depth_bounds_supported, .wideLines = false, .largePoints = true, .alphaToOne = false, @@ -908,6 +908,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_depth_bounds_supported = features.depthBounds; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 693419505..1ab63ecd7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + // Returns true if depth bounds is supported. + bool IsDepthBoundsSupported() const { + return is_depth_bounds_supported; + } + /// Returns true when blitting from and to depth stencil images is supported. bool IsBlitDepthStencilSupported() const { return is_blit_depth_stencil_supported; @@ -314,6 +319,7 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. From 1148a4eac715869077ace56a9a311a167643aca3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:44:28 -0300 Subject: [PATCH 511/770] vulkan: Conditionally use shaderInt16 Add support for Polaris AMD devices. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cec51cc77..2a2f166c8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -249,7 +249,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .unified_descriptor_binding = true, .support_descriptor_aliasing = true, .support_int8 = true, - .support_int16 = true, + .support_int16 = device.IsShaderInt16Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0a42efb6a..2b715baba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -253,7 +253,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderCullDistance = true, .shaderFloat64 = is_shader_float64_supported, .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = true, + .shaderInt16 = is_shader_int16_supported, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -912,6 +912,7 @@ void Device::SetupFeatures() { is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; + is_shader_int16_supported = features.shaderInt16; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 1ab63ecd7..9bc1fb947 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true if shader int16 is supported. + bool IsShaderInt16Supported() const { + return is_shader_int16_supported; + } + // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { return is_depth_bounds_supported; @@ -322,6 +327,7 @@ private: bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. + bool is_shader_int16_supported{}; ///< Support for int16. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. From adc43297c5c06adc101f2ec2198c93cbff2654e5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:45:05 -0300 Subject: [PATCH 512/770] spirv: Fix output generics with components --- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 7c618125e..007b79650 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -188,7 +188,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo .first_element = element, .num_components = num_components, }; - std::fill_n(ctx.output_generics[index].begin(), num_components, info); + std::fill_n(ctx.output_generics[index].begin() + element, num_components, info); element += num_components; } } From 5d170de0b5c57afdfc7c633c0b3b36d7ea9299c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:46:07 -0300 Subject: [PATCH 513/770] shader: Implement ISCADD32I --- .../translate/impl/integer_scaled_add.cpp | 54 ++++++++++++------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 93cc2c0b1..044671943 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -8,40 +8,36 @@ namespace Shader::Maxwell { namespace { -void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, + u64 scale_imm) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> op_a; - BitField<47, 1, u64> cc; - BitField<48, 2, u64> three_for_po; - BitField<48, 1, u64> neg_b; - BitField<49, 1, u64> neg_a; - BitField<39, 5, u64> scale; } const iscadd{insn}; - const bool po{iscadd.three_for_po == 3}; + const bool po{neg_a && neg_b}; IR::U32 op_a{v.X(iscadd.op_a)}; - if (!po) { - // When PO is not present, the bits are interpreted as negation - if (iscadd.neg_a != 0) { - op_a = v.ir.INeg(op_a); - } - if (iscadd.neg_b != 0) { - op_b = v.ir.INeg(op_b); - } - } else { + if (po) { // When PO is present, add one op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); + } else { + // When PO is not present, the bits are interpreted as negation + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + if (neg_b) { + op_b = v.ir.INeg(op_b); + } } // With the operands already processed, scale A - const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; + const IR::U32 scale{v.ir.Imm32(static_cast(scale_imm))}; const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; v.X(iscadd.dest_reg, result); - if (iscadd.cc != 0) { + if (cc) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); const IR::U1 carry{v.ir.GetCarryFromOp(result)}; @@ -51,6 +47,18 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); +} + } // Anonymous namespace void TranslatorVisitor::ISCADD_reg(u64 insn) { @@ -65,8 +73,14 @@ void TranslatorVisitor::ISCADD_imm(u64 insn) { ISCADD(*this, insn, GetImm20(insn)); } -void TranslatorVisitor::ISCADD32I(u64) { - throw NotImplementedException("ISCADD32I"); +void TranslatorVisitor::ISCADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 5, u64> scale; + } const iscadd{insn}; + + return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); } } // namespace Shader::Maxwell From 05d41fa9b70af6d469f2f6f1474436c9255e9bc3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 23:08:17 -0300 Subject: [PATCH 514/770] shader: Add support for "negative" and unaligned offsets "Negative" offsets don't exist. They are shown as such due to a bug in nvdisasm. Unaligned offsets have been proved to read the aligned offset. For example, when reading an U32, if the offset is 6, the offset read will be 4. --- .../backend/glasm/emit_glasm_context_get_set.cpp | 8 ++++++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 9 +++------ .../frontend/maxwell/translate/impl/impl.cpp | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 787612def..9ad668b86 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -18,6 +18,14 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU throw NotImplementedException("Indirect constant buffer loading"); } const Register ret{ctx.reg_alloc.Define(inst)}; + if (offset.type == Type::U32) { + // Avoid reading arrays out of bounds, matching hardware's behavior + const u32 imm_offset{offset.imm_u32}; + if (offset.imm_u32 >= 0x10'000) { + ctx.Add("MOV.S {},0;", ret); + return; + } + } ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index c1b69c234..442a958a5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -122,7 +122,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, - const IR::Value& binding, const IR::Value& offset, bool check_alignment = true) { + const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -138,17 +138,14 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; return ctx.OpLoad(result_type, access_chain); } - if (check_alignment && offset.U32() % element_size != 0) { - throw NotImplementedException("Unaligned immediate constant buffer load"); - } + // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) const Id imm_offset{ctx.Const(offset.U32() / element_size)}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; return ctx.OpLoad(result_type, access_chain); } Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset, - false); + return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); } Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 88bbac0a5..b446aae0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -122,14 +122,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { static std::pair CbufAddr(u64 insn) { union { u64 raw; - BitField<20, 14, s64> offset; + BitField<20, 14, u64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } - if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { + if (cbuf.offset >= 0x10'000) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } const IR::Value binding{static_cast(cbuf.binding)}; From ac0f5d2ab6cf0843c9ac6179b52b1e069a78069e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 31 May 2021 04:19:31 -0300 Subject: [PATCH 515/770] shader: Track legacy varyings --- .../ir_opt/collect_shader_info_pass.cpp | 101 +++++++++++++++--- src/shader_recompiler/shader_info.h | 21 +++- 2 files changed, 105 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ea08aacc3..b343f0429 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -43,14 +43,11 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.loads_position = true; break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + info.loads_color_front_diffuse = true; break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: @@ -60,6 +57,15 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::TessellationEvaluationPointV: info.loads_tess_coord = true; break; + case IR::Attribute::InstanceId: + info.loads_instance_id = true; + break; + case IR::Attribute::VertexId: + info.loads_vertex_id = true; + break; + case IR::Attribute::FrontFace: + info.loads_front_face = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } @@ -71,6 +77,12 @@ void SetAttribute(Info& info, IR::Attribute attribute) { return; } switch (attribute) { + case IR::Attribute::Layer: + info.stores_layer = true; + break; + case IR::Attribute::ViewportIndex: + info.stores_viewport_index = true; + break; case IR::Attribute::PointSize: info.stores_point_size = true; break; @@ -80,6 +92,72 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.stores_position = true; break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + info.stores_color_front_diffuse = true; + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + info.stores_color_front_specular = true; + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + info.stores_color_back_diffuse = true; + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + info.stores_color_front_specular = true; + break; + case IR::Attribute::FixedFncTexture0S: + case IR::Attribute::FixedFncTexture0T: + case IR::Attribute::FixedFncTexture0R: + case IR::Attribute::FixedFncTexture0Q: + case IR::Attribute::FixedFncTexture1S: + case IR::Attribute::FixedFncTexture1T: + case IR::Attribute::FixedFncTexture1R: + case IR::Attribute::FixedFncTexture1Q: + case IR::Attribute::FixedFncTexture2S: + case IR::Attribute::FixedFncTexture2T: + case IR::Attribute::FixedFncTexture2R: + case IR::Attribute::FixedFncTexture2Q: + case IR::Attribute::FixedFncTexture3S: + case IR::Attribute::FixedFncTexture3T: + case IR::Attribute::FixedFncTexture3R: + case IR::Attribute::FixedFncTexture3Q: + case IR::Attribute::FixedFncTexture4S: + case IR::Attribute::FixedFncTexture4T: + case IR::Attribute::FixedFncTexture4R: + case IR::Attribute::FixedFncTexture4Q: + case IR::Attribute::FixedFncTexture5S: + case IR::Attribute::FixedFncTexture5T: + case IR::Attribute::FixedFncTexture5R: + case IR::Attribute::FixedFncTexture5Q: + case IR::Attribute::FixedFncTexture6S: + case IR::Attribute::FixedFncTexture6T: + case IR::Attribute::FixedFncTexture6R: + case IR::Attribute::FixedFncTexture6Q: + case IR::Attribute::FixedFncTexture7S: + case IR::Attribute::FixedFncTexture7T: + case IR::Attribute::FixedFncTexture7R: + case IR::Attribute::FixedFncTexture7Q: + case IR::Attribute::FixedFncTexture8S: + case IR::Attribute::FixedFncTexture8T: + case IR::Attribute::FixedFncTexture8R: + case IR::Attribute::FixedFncTexture8Q: + case IR::Attribute::FixedFncTexture9S: + case IR::Attribute::FixedFncTexture9T: + case IR::Attribute::FixedFncTexture9R: + case IR::Attribute::FixedFncTexture9Q: + info.stores_fixed_fnc_textures = true; + break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -90,11 +168,8 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; + case IR::Attribute::FogCoordinate: + info.stores_fog_coordinate = true; break; case IR::Attribute::ViewportMask: info.stores_viewport_mask = true; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d6c32fbe5..b60ba0457 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -122,26 +122,39 @@ struct Info { std::array input_generics{}; bool loads_primitive_id{}; bool loads_position{}; + bool loads_color_front_diffuse{}; + bool loads_point_coord{}; bool loads_instance_id{}; bool loads_vertex_id{}; bool loads_front_face{}; - bool loads_point_coord{}; + bool loads_tess_coord{}; + bool loads_indexed_attributes{}; std::array stores_frag_color{}; bool stores_sample_mask{}; bool stores_frag_depth{}; + std::array stores_generics{}; - bool stores_position{}; - bool stores_point_size{}; - bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_point_size{}; + bool stores_position{}; + bool stores_color_front_diffuse{}; + bool stores_color_front_specular{}; + bool stores_color_back_diffuse{}; + bool stores_color_back_specular{}; + bool stores_fixed_fnc_textures{}; + bool stores_clip_distance{}; + bool stores_fog_coordinate{}; bool stores_viewport_mask{}; + bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; + bool stores_indexed_attributes{}; + bool stores_global_memory{}; bool uses_fp16{}; From cf9f88e5a7b884a417c91725b5cb9e500792e206 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 31 May 2021 04:20:09 -0300 Subject: [PATCH 516/770] glasm: Implement legacy varyings --- .../glasm/emit_glasm_context_get_set.cpp | 87 ++++++++++++++----- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 9ad668b86..3236def25 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -90,14 +90,20 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); } break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle); break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); + break; case IR::Attribute::InstanceId: ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); break; @@ -121,28 +127,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); return; } - switch (attr) { - case IR::Attribute::PointSize: - ctx.Add("MOV.F result.pointsize.x,{};", value); - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - ctx.Add("MOV.F result.position.{},{};", swizzle, value); - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: { - const u32 index{static_cast(attr) - static_cast(IR::Attribute::ClipDistance0)}; - ctx.Add("MOV.F result.clip[{}].x,{};", index, value); - break; + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { + const u32 index{ + (static_cast(attr) - static_cast(IR::Attribute::FixedFncTexture0S)) / 4}; + ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); + return; } + switch (attr) { case IR::Attribute::Layer: if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.layer.x,{};", value); @@ -157,6 +148,54 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, // LOG_WARNING } break; + case IR::Attribute::PointSize: + ctx.Add("MOV.F result.pointsize.x,{};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("MOV.F result.position.{},{};", swizzle, value); + break; + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("MOV.F result.color.{},{};", swizzle, value); + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value); + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + ctx.Add("MOV.F result.color.back.{},{};", swizzle, value); + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value); + break; + case IR::Attribute::FogCoordinate: + ctx.Add("MOV.F result.fogcoord.x,{};", value); + break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast(attr) - static_cast(IR::Attribute::ClipDistance0)}; + ctx.Add("MOV.F result.clip[{}].x,{};", index, value); + break; + } default: throw NotImplementedException("Set attribute {}", attr); } From 5539b13c5a696de553be86bb7c4ae61a0cdcc754 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 31 May 2021 05:34:53 -0300 Subject: [PATCH 517/770] shader,glasm: Implement legacy texcoord loads --- .../glasm/emit_glasm_context_get_set.cpp | 12 +++- .../ir_opt/collect_shader_info_pass.cpp | 70 +++++-------------- src/shader_recompiler/shader_info.h | 1 + 3 files changed, 29 insertions(+), 54 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 3236def25..3b73e8757 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -37,6 +37,10 @@ bool IsInputArray(Stage stage) { std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } + +u32 TexCoordIndex(IR::Attribute attr) { + return (static_cast(attr) - static_cast(IR::Attribute::FixedFncTexture0S)) / 4; +} } // Anonymous namespace void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { @@ -76,6 +80,11 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); return; } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle); + return; + } switch (attr) { case IR::Attribute::PrimitiveId: ctx.Add("MOV.S {}.x,primitive.id;", inst); @@ -128,8 +137,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { - const u32 index{ - (static_cast(attr) - static_cast(IR::Attribute::FixedFncTexture0S)) / 4}; + const u32 index{TexCoordIndex(attr)}; ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); return; } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index b343f0429..6a5243c9f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -28,12 +28,16 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attribute) { - if (IR::IsGeneric(attribute)) { - info.input_generics.at(IR::GenericAttributeIndex(attribute)).used = true; +void GetAttribute(Info& info, IR::Attribute attr) { + if (IR::IsGeneric(attr)) { + info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; return; } - switch (attribute) { + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + info.loads_fixed_fnc_textures = true; + return; + } + switch (attr) { case IR::Attribute::PrimitiveId: info.loads_primitive_id = true; break; @@ -67,16 +71,20 @@ void GetAttribute(Info& info, IR::Attribute attribute) { info.loads_front_face = true; break; default: - throw NotImplementedException("Get attribute {}", attribute); + throw NotImplementedException("Get attribute {}", attr); } } -void SetAttribute(Info& info, IR::Attribute attribute) { - if (IR::IsGeneric(attribute)) { - info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; +void SetAttribute(Info& info, IR::Attribute attr) { + if (IR::IsGeneric(attr)) { + info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; return; } - switch (attribute) { + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { + info.stores_fixed_fnc_textures = true; + return; + } + switch (attr) { case IR::Attribute::Layer: info.stores_layer = true; break; @@ -116,48 +124,6 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ColorBackSpecularA: info.stores_color_front_specular = true; break; - case IR::Attribute::FixedFncTexture0S: - case IR::Attribute::FixedFncTexture0T: - case IR::Attribute::FixedFncTexture0R: - case IR::Attribute::FixedFncTexture0Q: - case IR::Attribute::FixedFncTexture1S: - case IR::Attribute::FixedFncTexture1T: - case IR::Attribute::FixedFncTexture1R: - case IR::Attribute::FixedFncTexture1Q: - case IR::Attribute::FixedFncTexture2S: - case IR::Attribute::FixedFncTexture2T: - case IR::Attribute::FixedFncTexture2R: - case IR::Attribute::FixedFncTexture2Q: - case IR::Attribute::FixedFncTexture3S: - case IR::Attribute::FixedFncTexture3T: - case IR::Attribute::FixedFncTexture3R: - case IR::Attribute::FixedFncTexture3Q: - case IR::Attribute::FixedFncTexture4S: - case IR::Attribute::FixedFncTexture4T: - case IR::Attribute::FixedFncTexture4R: - case IR::Attribute::FixedFncTexture4Q: - case IR::Attribute::FixedFncTexture5S: - case IR::Attribute::FixedFncTexture5T: - case IR::Attribute::FixedFncTexture5R: - case IR::Attribute::FixedFncTexture5Q: - case IR::Attribute::FixedFncTexture6S: - case IR::Attribute::FixedFncTexture6T: - case IR::Attribute::FixedFncTexture6R: - case IR::Attribute::FixedFncTexture6Q: - case IR::Attribute::FixedFncTexture7S: - case IR::Attribute::FixedFncTexture7T: - case IR::Attribute::FixedFncTexture7R: - case IR::Attribute::FixedFncTexture7Q: - case IR::Attribute::FixedFncTexture8S: - case IR::Attribute::FixedFncTexture8T: - case IR::Attribute::FixedFncTexture8R: - case IR::Attribute::FixedFncTexture8Q: - case IR::Attribute::FixedFncTexture9S: - case IR::Attribute::FixedFncTexture9T: - case IR::Attribute::FixedFncTexture9R: - case IR::Attribute::FixedFncTexture9Q: - info.stores_fixed_fnc_textures = true; - break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: @@ -175,7 +141,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { info.stores_viewport_mask = true; break; default: - throw NotImplementedException("Set attribute {}", attribute); + throw NotImplementedException("Set attribute {}", attr); } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index b60ba0457..d5b2ca7bc 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -123,6 +123,7 @@ struct Info { bool loads_primitive_id{}; bool loads_position{}; bool loads_color_front_diffuse{}; + bool loads_fixed_fnc_textures{}; bool loads_point_coord{}; bool loads_instance_id{}; bool loads_vertex_id{}; From 22f0c4f002b1d215fe5accde7a25dfb724889ee8 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 31 May 2021 16:57:11 -0400 Subject: [PATCH 518/770] emit_glasm_context_get_set: Remove unused variable --- .../backend/glasm/emit_glasm_context_get_set.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 3b73e8757..c1df7a342 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -20,7 +20,6 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU const Register ret{ctx.reg_alloc.Define(inst)}; if (offset.type == Type::U32) { // Avoid reading arrays out of bounds, matching hardware's behavior - const u32 imm_offset{offset.imm_u32}; if (offset.imm_u32 >= 0x10'000) { ctx.Add("MOV.S {},0;", ret); return; From d26271b0148e4c41d87199b3e42a5702d1a6be53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 19:59:29 -0300 Subject: [PATCH 519/770] vk_swapchain: Avoid recreating the swapchain on each frame Recreate only when requested (or sRGB is changed) instead of tracking the frontend's size. That size is still used as a hint. --- .../renderer_vulkan/renderer_vulkan.cpp | 18 ++++++------------ src/video_core/renderer_vulkan/vk_swapchain.h | 6 +++--- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d50647ba7..54c41bcaf 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -139,17 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); bool has_been_recreated = false; const auto recreate_swapchain = [&] { if (!has_been_recreated) { has_been_recreated = true; scheduler.WaitWorker(); } + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || - swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } bool needs_recreate; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index b38fd9dc2..df6da3d93 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -33,9 +33,9 @@ public: /// Presents the rendered image to the swapchain. void Present(VkSemaphore render_semaphore); - /// Returns true when the framebuffer layout has changed. - bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { - return extent.width != width || extent.height != height || current_srgb != is_srgb; + /// Returns true when the color space has changed. + bool HasColorSpaceChanged(bool is_srgb) const { + return current_srgb != is_srgb; } /// Returns true when the image has to be recreated. From 46bd362d0dfab27e8c8d49f11eb4e3f373bf8f23 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 20:37:45 -0300 Subject: [PATCH 520/770] fixed_pipeline_state: Use regular for loop instead of ranges for perf MSVC generates better code for it. --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 3a43c329f..1486d088a 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,9 +84,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); - std::ranges::transform(regs.rt, color_formats.begin(), - [](const auto& rt) { return static_cast(rt.format); }); + for (size_t i = 0; i < regs.rt.size(); ++i) { + color_formats[i] = static_cast(regs.rt[i].format); + } alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); From e57ee3b7fd8dd942b616d389635a4e9f00c596e9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 01:10:04 -0300 Subject: [PATCH 521/770] transform_feedback: Read buffer stride from index instead of layout --- src/video_core/transform_feedback.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index db52fff93..ba26ac3f1 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -72,8 +72,9 @@ std::vector MakeTransformFeedbackVaryings( const u32 base_offset = offset; const u8 location = locations[offset]; + UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream); Shader::TransformFeedbackVarying varying{ - .buffer = layout.stream, + .buffer = static_cast(buffer), .stride = layout.stride, .offset = offset * 4, .components = 1, From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: [PATCH 522/770] buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- .../ir_opt/collect_shader_info_pass.cpp | 19 +++++++-- src/shader_recompiler/shader_info.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 42 ++++++++++++------- .../renderer_opengl/gl_compute_pipeline.cpp | 4 +- .../renderer_opengl/gl_compute_pipeline.h | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 21 +++++----- .../renderer_opengl/gl_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 ++- .../renderer_vulkan/vk_compute_pipeline.h | 2 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- 11 files changed, 78 insertions(+), 38 deletions(-) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6a5243c9f..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -560,32 +560,45 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufF32: case IR::Opcode::GetCbufU32x2: { - if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32(), 1); - } else { + const IR::Value index{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!index.IsImmediate()) { throw NotImplementedException("Constant buffer with non-immediate index"); } + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size{}; switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; + element_size = 1; break; case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: info.used_constant_buffer_types |= IR::Type::U16; + element_size = 2; break; case IR::Opcode::GetCbufU32: info.used_constant_buffer_types |= IR::Type::U32; + element_size = 4; break; case IR::Opcode::GetCbufF32: info.used_constant_buffer_types |= IR::Type::F32; + element_size = 4; break; case IR::Opcode::GetCbufU32x2: info.used_constant_buffer_types |= IR::Type::U32x2; + element_size = 8; break; default: break; } + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = std::max(size, offset.U32() + element_size); + } else { + size = 0x10'000; + } break; } case IR::Opcode::BindlessImageSampleImplicitLod: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d5b2ca7bc..32f8a50ea 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -197,6 +197,7 @@ struct Info { IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; + std::array constant_buffer_used_sizes{}; u32 nvn_buffer_base{}; std::bitset<16> nvn_buffer_used{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array; template class BufferCache { @@ -142,9 +144,10 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(const std::array& mask); + void SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); @@ -384,8 +387,11 @@ private: std::array compute_storage_buffers; std::array compute_texture_buffers; - std::array enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + std::array enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array enabled_storage_buffers{}; std::array written_storage_buffers{}; @@ -670,18 +676,22 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { +void BufferCache

::SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers != mask) { + if (enabled_uniform_buffer_masks != mask) { dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers = mask; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template -void BufferCache

::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache

::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template @@ -984,7 +994,7 @@ void BufferCache

::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -998,7 +1008,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -1113,11 +1123,11 @@ void BufferCache

::BindHostComputeUniformBuffers() { dirty_uniform_buffers.fill(~u32{0}); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1261,7 +1271,7 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { template void BufferCache

::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1334,7 +1344,7 @@ void BufferCache

::UpdateTransformFeedbackBuffer(u32 index) { template void BufferCache

::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 5cf5f97a9..61b6fe4b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -43,6 +43,8 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); num_image_buffers = AccumulateCount(info.image_buffer_descriptors); @@ -63,7 +65,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac } void ComputePipeline::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index dd6b62ef2..b5dfb65e9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -72,6 +72,7 @@ private: Shader::Info info; OGLProgram source_program; OGLAssemblyProgram assembly_program; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 976897067..a5d65fdca 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -60,6 +60,14 @@ std::pair TransformFeedbackEnum(u8 location) { UNIMPLEMENTED_MSG("index={}", index); return {GL_POSITION, 0}; } + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -100,7 +108,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } - enabled_uniform_buffers[stage] = info.constant_buffer_mask; + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; @@ -130,14 +139,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - void GraphicsPipeline::Configure(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; @@ -147,7 +148,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index bf33ce604..508fad5bb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,7 +99,8 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 168ffa7e9..ca59042ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -27,6 +28,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + auto func{[this, &descriptor_pool] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -75,7 +79,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, BufferCache& buffer_cache, TextureCache& texture_cache) { update_descriptor_queue.Acquire(); - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a560e382e..a6043866d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -44,6 +44,8 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + vk::ShaderModule spv_module; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d381109d6..627ca0158 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,10 +218,14 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { - return info ? info->constant_buffer_mask : 0; - }); - + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + const Shader::Info* const info{infos[stage]}; + if (!info) { + continue; + } + enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; + std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + } auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); @@ -262,7 +266,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4068a0edc..8c81c28a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -130,7 +130,8 @@ private: std::array spv_modules; std::array stage_infos; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; From 79f2fe1a39120f498e915fa0c740b15dc0f09793 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:02:33 -0300 Subject: [PATCH 523/770] glasm: Use ARB_derivative_control conditionally --- .../backend/glasm/emit_glasm.cpp | 7 +++-- .../backend/glasm/emit_glasm_warp.cpp | 29 ++++++++++++++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ .../renderer_opengl/gl_shader_cache.cpp | 1 + 6 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 70ca6f621..fc01797b6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -265,9 +265,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, // TODO: Track the shared atomic ops header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" - "OPTION NV_gpu_program_fp64;" - "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;"; + "OPTION NV_gpu_program_fp64;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -295,6 +293,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { header += "OPTION EXT_shader_image_load_formatted;"; } + if (profile.support_derivative_control) { + header += "OPTION ARB_derivative_control;"; + } if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 6e30790bb..8cec5ee7e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { @@ -111,19 +112,39 @@ void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDX.FINE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.FINE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDX {}.x,{};", inst, p); + } } void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDY.FINE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.FINE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDY {}.x,{};", inst, p); + } } void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDX.COARSE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.COARSE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDX {}.x,{};", inst, p); + } } void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDY.COARSE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.COARSE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDY {}.x,{};", inst, p); + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f059e3b26..3109fb69c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -82,6 +82,7 @@ struct Profile { bool support_typeless_image_loads{}; bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; + bool support_derivative_control{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 01da2bb57..3f7929f9e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -154,6 +154,7 @@ Device::Device() { has_precise_bug = TestPreciseBug(); has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_derivative_control = GLAD_GL_ARB_derivative_control; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d67f5693c..1ffd24883 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -96,6 +96,10 @@ public: return has_nv_viewport_array2; } + bool HasDerivativeControl() const { + return has_derivative_control; + } + bool HasDebuggingToolAttached() const { return has_debugging_tool_attached; } @@ -141,6 +145,7 @@ private: bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_derivative_control{}; bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ea7c0ee8..bdffac4b2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -274,6 +274,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = true, From 4f8b68fb0424ccd273107e45709acb6a5c35cecb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:10:15 -0300 Subject: [PATCH 524/770] shader: Avoid CPU side undefined behavior on I2F --- .../translate/impl/integer_floating_point_conversion.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index d6224d5cc..e0e157275 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -114,6 +114,8 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { IR::U1 is_least; if (src_bitsize == 64) { is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else if (src_bitsize == 32) { + is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits::min())); } else { const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; is_least = v.ir.IEqual(src, least_value); From 8c954fcaee77c70583c5edc73c7c35eefcca39b0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:30 -0300 Subject: [PATCH 525/770] vk_pipeline_cache: Set support_derivative_control to true --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a2f166c8..a7f3619a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,6 +274,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .support_derivative_control = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), From 8f099af6a8ea691c5f8f1403848ca42077b34bd6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:59 -0300 Subject: [PATCH 526/770] nsight_aftermath_tracker: Fix SPIR-V module writes --- src/video_core/vulkan_common/nsight_aftermath_tracker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 209cb1e0a..fdd1a5081 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -99,7 +99,7 @@ void NsightAftermathTracker::SaveShader(std::span spirv) const { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; } - if (file.Write(spirv) != spirv.size()) { + if (file.WriteSpan(spirv) != spirv.size()) { LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); return; } From b02c78b276f449318bdc787a35d123df01c0bc6d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:01 -0300 Subject: [PATCH 527/770] vk_buffer_cache: Handle null texture buffers Fixes a crash on Age of Calamity cutscenes. --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2da3de6de..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -97,6 +97,10 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast } VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + if (!device) { + // Null buffer, return a null descriptor + return VK_NULL_HANDLE; + } const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { return offset == view.offset && size == view.size && format == view.format; })}; From ec9a78885e6a07b5259c9fbec19d9756443651b1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:34 -0300 Subject: [PATCH 528/770] shader: Add 2D and 3D variants to SUATOM and SURED Used by Claybook. --- .../maxwell/translate/impl/surface_atomic_operations.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 994bdc3eb..44144f154 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -76,6 +76,10 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { case Type::_1D: case Type::BUFFER_1D: return v.X(reg); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); default: break; } From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: [PATCH 529/770] shader: Fix VertexA Shaders. --- .../frontend/maxwell/program.cpp | 19 ++++++++++---- .../ir_opt/dual_vertex_pass.cpp | 24 +++++++++++------ src/shader_recompiler/ir_opt/passes.h | 1 - .../renderer_opengl/gl_shader_cache.cpp | 26 +++++++++++++++---- 4 files changed, 51 insertions(+), 19 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 900fc7ab1..8489f9a5f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -171,20 +171,29 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b IR::Program result{}; Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); - std::swap(result.blocks, vertex_a.blocks); - result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index b0a9f5258..a926123f2 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,16 +13,24 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexATransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } } -void VertexBTransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); -} - -void DualVertexJoinPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index e9cb8546a..5ebde49ea 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -25,7 +25,6 @@ void VerificationPass(const IR::Program& program); // Dual Vertex void VertexATransformPass(IR::Program& program); void VertexBTransformPass(IR::Program& program); -void DualVertexJoinPass(IR::Program& program); void JoinTextureInfo(Info& base, Info& source); void JoinStorageInfo(Info& base, Info& source); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index bdffac4b2..0e4904733 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -40,6 +40,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -446,6 +447,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( size_t env_index{}; u32 total_storage_buffers{}; std::array programs; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -454,11 +457,22 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + for (const auto& desc : program_vb.info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; @@ -472,7 +486,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } From f45f7b5c2a869123340591cec6db58c33a5fd3ab Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 3 Jun 2021 17:42:24 -0300 Subject: [PATCH 530/770] vk_swapchain: Handle outdated swapchains Fixes pixelated presentation on Intel devices. --- .../renderer_vulkan/renderer_vulkan.cpp | 11 ++++---- .../renderer_vulkan/vk_swapchain.cpp | 26 ++++++++++++++----- src/video_core/renderer_vulkan/vk_swapchain.h | 14 +++++++--- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 54c41bcaf..6fda06a7e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -143,18 +143,17 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { + if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } - bool needs_recreate; + bool is_outdated; do { - needs_recreate = false; swapchain.AcquireNextImage(); - if (swapchain.NeedsRecreate()) { + is_outdated = swapchain.IsOutDated(); + if (is_outdated) { recreate_swapchain(); - needs_recreate = true; } - } while (needs_recreate); + } while (is_outdated); if (has_been_recreated) { blit_screen.Recreate(); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a71b0b01e..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,7 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { - needs_recreate = false; + is_outdated = false; + is_suboptimal = false; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -85,11 +86,22 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { } void VKSwapchain::AcquireNextImage() { - const VkResult result = - device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), - *present_semaphores[frame_index], {}, &image_index); - needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; - + const VkResult result = device.GetLogical().AcquireNextImageKHR( + *swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], + VK_NULL_HANDLE, &image_index); + switch (result) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + is_suboptimal = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); + break; + } scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); } @@ -115,7 +127,7 @@ void VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - needs_recreate = true; + is_outdated = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index df6da3d93..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -38,9 +38,14 @@ public: return current_srgb != is_srgb; } - /// Returns true when the image has to be recreated. - bool NeedsRecreate() const { - return needs_recreate; + /// Returns true when the swapchain is outdated. + bool IsOutDated() const { + return is_outdated; + } + + /// Returns true when the swapchain is suboptimal. + bool IsSubOptimal() const { + return is_suboptimal; } VkExtent2D GetSize() const { @@ -95,7 +100,8 @@ private: VkExtent2D extent{}; bool current_srgb{}; - bool needs_recreate{}; + bool is_outdated{}; + bool is_suboptimal{}; }; } // namespace Vulkan From c736b9ffabc8a869d8ed131d365aff21b049f751 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:52:40 +0200 Subject: [PATCH 531/770] DMA: Restrict optimised path for BlockToLinear further. --- src/video_core/engines/maxwell_dma.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c51776466..c7ec1eac9 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Optimized path for micro copies. const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; - if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && + regs.src_params.height > GOB_SIZE_Y) { FastCopyBlockLinearToPitch(); return; } From 2a0aeaa3d283f1c7f003c956ab3079f70246b008 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 21:48:38 -0300 Subject: [PATCH 532/770] vk_rasterizer: Flush work on clear and dispatches --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9611b480a..e72f8426b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -189,6 +189,7 @@ void RasterizerVulkan::Clear() { if (!maxwell3d.ShouldExecute()) { return; } + FlushWork(); query_cache.UpdateCounters(); @@ -259,6 +260,8 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { + FlushWork(); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { return; From 48aad8dc05f027c21aa0e8a68d827006d9f7a196 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 22:10:02 -0300 Subject: [PATCH 533/770] vk_pipeline_cache: Add asynchronous shaders --- .../renderer_vulkan/vk_graphics_pipeline.h | 6 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 +++++++++++++++++-- .../renderer_vulkan/vk_pipeline_cache.h | 4 +++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8c81c28a8..3f8895927 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -87,7 +87,7 @@ public: configure_func(this, is_indexed); } - GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; } @@ -96,6 +96,10 @@ public: : nullptr; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + template static auto MakeConfigureSpecFunc() { return [](GraphicsPipeline* pipeline, bool is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a7f3619a5..741ed1a98 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -240,6 +240,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; @@ -303,7 +304,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; if (next) { current_pipeline = next; - return current_pipeline; + return BuiltPipeline(current_pipeline); } } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; @@ -318,7 +319,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline->AddTransition(pipeline.get()); } current_pipeline = pipeline.get(); - return current_pipeline; + return BuiltPipeline(current_pipeline); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -415,6 +416,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; + } + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; +} + std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span envs, bool build_in_parallel) try { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4116cc73f..869c63baf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -115,6 +115,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,6 +142,8 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; + bool use_asynchronous_shaders{}; + std::unordered_map> compute_cache; std::unordered_map> graphics_cache; From cffd4716c5ebf9b93505b5bfa96d9b407f349336 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:11:36 -0300 Subject: [PATCH 534/770] vk_pipeline_cache,shader_notify: Add shader notifications --- .../renderer_vulkan/vk_compute_pipeline.cpp | 12 ++++- .../renderer_vulkan/vk_compute_pipeline.h | 7 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 30 ++++++----- .../renderer_vulkan/vk_graphics_pipeline.h | 22 ++++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++++++-------- .../renderer_vulkan/vk_pipeline_cache.h | 9 +++- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_notify.cpp | 49 ++++++++---------- src/video_core/shader_notify.h | 28 ++++++----- src/yuzu/main.cpp | 12 ++--- 10 files changed, 126 insertions(+), 95 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ca59042ff..cc855a62e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -24,14 +25,18 @@ using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* thread_worker, const Shader::Info& info_, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - auto func{[this, &descriptor_pool] { + auto func{[this, &descriptor_pool, shader_notify] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -66,6 +71,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a6043866d..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -18,6 +18,10 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { class Device; @@ -27,7 +31,8 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* thread_worker, const Shader::Info& info, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, vk::ShaderModule spv_module); ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 627ca0158..5c916c869 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -17,6 +17,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #if defined(_MSC_VER) && defined(NDEBUG) @@ -203,30 +204,30 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m } } // Anonymous namespace -GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, - BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device_, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key_, - std::array stages, - const std::array& infos) +GraphicsPipeline::GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, + VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, + RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, + std::array stages, + const std::array& infos) : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { const Shader::Info* const info{infos[stage]}; if (!info) { continue; } + stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); } - auto func{[this, &render_pass_cache, &descriptor_pool] { + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); @@ -242,6 +243,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3f8895927..40d1edabd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -20,6 +20,10 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { struct GraphicsPipelineCacheKey { @@ -64,16 +68,14 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key, - std::array stages, - const std::array& infos); + explicit GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + VideoCore::ShaderNotify* shader_notify, const Device& device, + DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, + const std::array& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 741ed1a98..e61d76490 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -235,11 +235,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, - TextureCache& texture_cache_) + TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { @@ -307,19 +307,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { return BuiltPipeline(current_pipeline); } } - const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& pipeline{pair->second}; - if (is_new) { - pipeline = CreateGraphicsPipeline(); - } - if (!pipeline) { - return nullptr; - } - if (current_pipeline) { - current_pipeline->AddTransition(pipeline.get()); - } - current_pipeline = pipeline.get(); - return BuiltPipeline(current_pipeline); + return CurrentGraphicsPipelineSlowPath(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -416,6 +404,22 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); + } + if (!pipeline) { + return nullptr; + } + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); +} + GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { if (pipeline->IsBuilt()) { return pipeline; @@ -484,14 +488,16 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; + const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique( - maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; + return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, + texture_cache, notify, device, descriptor_pool, + update_descriptor_queue, thread_worker, + render_pass_cache, key, std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -550,12 +556,14 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}", key.unique_hash)}; + const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, program.info, std::move(spv_module)); + thread_worker, notify, program.info, + std::move(spv_module)); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 869c63baf..167a2ee2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -38,6 +38,10 @@ namespace Shader::IR { struct Program; } +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -104,7 +108,7 @@ public: VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, - TextureCache& texture_cache); + TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -115,6 +119,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; std::unique_ptr CreateGraphicsPipeline(); @@ -138,6 +144,7 @@ private: RenderPassCache& render_pass_cache; BufferCache& buffer_cache; TextureCache& texture_cache; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e72f8426b..d284b3653 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -140,7 +140,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, - texture_cache), + texture_cache, gpu.ShaderNotify()), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index 693e47158..dc6995b46 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -2,42 +2,35 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include +#include + #include "video_core/shader_notify.h" using namespace std::chrono_literals; namespace VideoCore { -namespace { -constexpr auto UPDATE_TICK = 32ms; -} -ShaderNotify::ShaderNotify() = default; -ShaderNotify::~ShaderNotify() = default; +const auto TIME_TO_STOP_REPORTING = 2s; -std::size_t ShaderNotify::GetShadersBuilding() { - const auto now = std::chrono::high_resolution_clock::now(); - const auto diff = now - last_update; - if (diff > UPDATE_TICK) { - std::shared_lock lock(mutex); - last_updated_count = accurate_count; +int ShaderNotify::ShadersBuilding() noexcept { + const int now_complete = num_complete.load(std::memory_order::relaxed); + const int now_building = num_building.load(std::memory_order::relaxed); + if (now_complete == now_building) { + const auto now = std::chrono::high_resolution_clock::now(); + if (completed && num_complete == num_when_completed) { + if (now - complete_time > TIME_TO_STOP_REPORTING) { + report_base = now_complete; + completed = false; + } + } else { + completed = true; + num_when_completed = num_complete; + complete_time = now; + } } - return last_updated_count; -} - -std::size_t ShaderNotify::GetShadersBuildingAccurate() { - std::shared_lock lock{mutex}; - return accurate_count; -} - -void ShaderNotify::MarkShaderComplete() { - std::unique_lock lock{mutex}; - accurate_count--; -} - -void ShaderNotify::MarkSharderBuilding() { - std::unique_lock lock{mutex}; - accurate_count++; + return now_building - report_base; } } // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index a9c92d179..ad363bfb5 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -4,26 +4,30 @@ #pragma once +#include #include -#include -#include "common/common_types.h" +#include namespace VideoCore { class ShaderNotify { public: - ShaderNotify(); - ~ShaderNotify(); + [[nodiscard]] int ShadersBuilding() noexcept; - std::size_t GetShadersBuilding(); - std::size_t GetShadersBuildingAccurate(); + void MarkShaderComplete() noexcept { + ++num_complete; + } - void MarkShaderComplete(); - void MarkSharderBuilding(); + void MarkShaderBuilding() noexcept { + ++num_building; + } private: - std::size_t last_updated_count{}; - std::size_t accurate_count{}; - std::shared_mutex mutex; - std::chrono::high_resolution_clock::time_point last_update{}; + std::atomic_int num_building{}; + std::atomic_int num_complete{}; + int report_base{}; + + bool completed{}; + int num_when_completed{}; + std::chrono::high_resolution_clock::time_point complete_time; }; } // namespace VideoCore diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 03a909d17..7e0b1adc4 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2900,13 +2900,13 @@ void GMainWindow::UpdateStatusBar() { return; } - auto results = Core::System::GetInstance().GetAndResetPerfStats(); - auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); - const auto shaders_building = shader_notify.GetShadersBuilding(); + auto& system = Core::System::GetInstance(); + auto results = system.GetAndResetPerfStats(); + auto& shader_notify = system.GPU().ShaderNotify(); + const int shaders_building = shader_notify.ShadersBuilding(); - if (shaders_building != 0) { - shader_building_label->setText( - tr("Building: %n shader(s)", "", static_cast(shaders_building))); + if (shaders_building > 0) { + shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building)); shader_building_label->setVisible(true); } else { shader_building_label->setVisible(false); From 12fe7210d2b546bd9c5825b6517b80efc818a7fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:35:57 -0300 Subject: [PATCH 535/770] gl_shader_cache: Store workers in shader cache object --- .../renderer_opengl/gl_shader_cache.cpp | 121 ++++++++++-------- .../renderer_opengl/gl_shader_cache.h | 7 + 2 files changed, 74 insertions(+), 54 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0e4904733..9d6cef6e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -239,6 +239,15 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace +struct ShaderCache::Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, @@ -247,46 +256,49 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} { - profile = Shader::Profile{ - .supported_spirv = 0x00010000, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, + use_asynchronous_shaders{device.UseAsynchronousShaders()}, + profile{ + .supported_spirv = 0x00010000, - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = - device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = device.HasNvViewportArray2(), - .support_typeless_image_loads = device.HasImageLoadFormatted(), - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - .support_derivative_control = device.HasDerivativeControl(), + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = device.HasNvViewportArray2(), + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), - .warp_size_potentially_larger_than_guest = true, + .warp_size_potentially_larger_than_guest = true, - .lower_left_origin_mode = true, - .need_declared_frag_colors = true, + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, - }; + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + } { + if (use_asynchronous_shaders) { + workers = CreateWorkers(); + } } ShaderCache::~ShaderCache() = default; @@ -307,29 +319,20 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, } shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); - struct Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; - }; - Common::StatefulThreadWorker workers( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); - + if (!workers) { + workers = CreateWorkers(); + } struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { ComputePipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; @@ -347,7 +350,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { boost::container::static_vector env_ptrs; for (auto& env : envs) { @@ -373,7 +376,10 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, state.has_loaded = true; lock.unlock(); - workers.WaitForRequests(); + workers->WaitForRequests(); + if (!use_asynchronous_shaders) { + workers.reset(); + } } GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { @@ -570,4 +576,11 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } +std::unique_ptr> ShaderCache::CreateWorkers() + const { + return std::make_unique>( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index cf74d34e4..e0c5a06d8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -12,6 +12,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -44,6 +45,8 @@ struct ShaderPools { }; class ShaderCache : public VideoCommon::ShaderCache { + struct Context; + public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -74,6 +77,8 @@ private: const ComputePipelineKey& key, Shader::Environment& env); + std::unique_ptr> CreateWorkers() const; + Core::Frontend::EmuWindow& emu_window; const Device& device; TextureCache& texture_cache; @@ -82,6 +87,7 @@ private: StateTracker& state_tracker; GraphicsPipelineKey graphics_key{}; + const bool use_asynchronous_shaders; ShaderPools main_pools; std::unordered_map> graphics_cache; @@ -89,6 +95,7 @@ private: Shader::Profile profile; std::filesystem::path shader_cache_filename; + std::unique_ptr> workers; }; } // namespace OpenGL From b1ed64ac18fe7b5fc89abe06442527d8c440ddc7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 01:28:22 -0300 Subject: [PATCH 536/770] gl_shader_util: Move shader utility code to a separate file --- .../renderer_opengl/gl_resource_manager.cpp | 27 ---- .../renderer_opengl/gl_resource_manager.h | 14 --- .../renderer_opengl/gl_shader_cache.cpp | 81 +----------- .../renderer_opengl/gl_shader_util.cpp | 117 ++++++++++++------ .../renderer_opengl/gl_shader_util.h | 89 ++----------- .../renderer_opengl/renderer_opengl.cpp | 12 +- .../renderer_opengl/util_shaders.cpp | 11 +- 7 files changed, 106 insertions(+), 245 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3428e5e21..8695c29e3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -83,18 +83,6 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(std::string_view source, GLenum type) { - if (handle != 0) { - return; - } - if (source.empty()) { - return; - } - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = GLShader::LoadShader(source, type); -} - void OGLShader::Release() { if (handle == 0) return; @@ -104,21 +92,6 @@ void OGLShader::Release() { handle = 0; } -void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, - const char* frag_shader, bool separable_program, - bool hint_retrievable) { - OGLShader vert, geo, frag; - if (vert_shader) - vert.Create(vert_shader, GL_VERTEX_SHADER); - if (geo_shader) - geo.Create(geo_shader, GL_GEOMETRY_SHADER); - if (frag_shader) - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle); -} - void OGLProgram::Release() { if (handle == 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 552d79db4..b2d5bfd3b 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -8,7 +8,6 @@ #include #include #include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -128,8 +127,6 @@ public: return *this; } - void Create(std::string_view source, GLenum type); - void Release(); GLuint handle = 0; @@ -151,17 +148,6 @@ public: return *this; } - template - void Create(bool separable_program, bool hint_retrievable, T... shaders) { - if (handle != 0) - return; - handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...); - } - - /// Creates a new internal OpenGL resource and stores the handle - void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, - bool separable_program = false, bool hint_retrievable = false); - /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9d6cef6e8..da0b36368 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" #include "video_core/shader_environment.h" @@ -53,77 +54,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -void AddShader(GLenum stage, GLuint program, std::span code) { - OGLShader shader; - shader.handle = glCreateShader(stage); - - glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), - static_cast(code.size_bytes())); - glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); - if (!Settings::values.renderer_debug) { - return; - } - GLint shader_status{}; - glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "Failed to build shader"); - } - GLint log_length{}; - glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { - OGLAssemblyProgram program; - glGenProgramsARB(1, &program.handle); - glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(code.size()), code.data()); - if (Settings::values.renderer_debug) { - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - if (std::strstr(err, "error")) { - LOG_CRITICAL(Render_OpenGL, "\n{}", err); - LOG_INFO(Render_OpenGL, "\n{}", code); - } else { - LOG_WARNING(Render_OpenGL, "\n{}", err); - } - } - } - return program; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -492,9 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; - ++index) { + const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; + for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -510,7 +439,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; - AddShader(Stage(stage_index), source_program.handle, code); + AttachShader(Stage(stage_index), source_program.handle, code); } } if (!device.UseAssemblyShaders()) { @@ -565,7 +494,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } else { const std::vector code{EmitSPIRV(profile, program)}; source_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4bf0d6090..99cb81819 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -5,57 +5,100 @@ #include #include #include + #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/renderer_opengl/gl_shader_util.h" -namespace OpenGL::GLShader { +namespace OpenGL { -namespace { - -std::string_view StageDebugName(GLenum type) { - switch (type) { - case GL_VERTEX_SHADER: - return "vertex"; - case GL_GEOMETRY_SHADER: - return "geometry"; - case GL_FRAGMENT_SHADER: - return "fragment"; - case GL_COMPUTE_SHADER: - return "compute"; +static void LogShader(GLuint shader) { + GLint shader_status{}; + glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); } - UNIMPLEMENTED(); - return "unknown"; } -} // Anonymous namespace +void AttachShader(GLenum stage, GLuint program, std::string_view code) { + OGLShader shader; + shader.handle = glCreateShader(stage); -GLuint LoadShader(std::string_view source, GLenum type) { - const std::string_view debug_type = StageDebugName(type); - const GLuint shader_id = glCreateShader(type); + const GLint length = static_cast(code.size()); + const GLchar* const code_ptr = code.data(); + glShaderSource(shader.handle, 1, &code_ptr, &length); + glCompileShader(shader.handle); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} - const GLchar* source_string = source.data(); - const GLint source_length = static_cast(source.size()); +void AttachShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); - glShaderSource(shader_id, 1, &source_string, &source_length); - LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); - glCompileShader(shader_id); + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} - GLint result = GL_FALSE; - GLint info_log_length; - glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); - if (info_log_length > 1) { - std::string shader_error(info_log_length, ' '); - glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", shader_error); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } } } - return shader_id; + return program; } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1b770532e..ff5aa024f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,92 +4,25 @@ #pragma once +#include #include +#include #include + #include + #include "common/assert.h" #include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" -namespace OpenGL::GLShader { +namespace OpenGL { -/** - * Utility function to log the source code of a list of shaders. - * @param shaders The OpenGL shaders whose source we will print. - */ -template -void LogShaderSource(T... shaders) { - auto shader_list = {shaders...}; +void AttachShader(GLenum stage, GLuint program, std::string_view code); - for (const auto& shader : shader_list) { - if (shader == 0) - continue; +void AttachShader(GLenum stage, GLuint program, std::span code); - GLint source_length; - glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length); +void LinkProgram(GLuint program); - std::string source(source_length, ' '); - glGetShaderSource(shader, source_length, nullptr, &source[0]); - LOG_INFO(Render_OpenGL, "Shader source {}", source); - } -} +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); -/** - * Utility function to create and compile an OpenGL GLSL shader - * @param source String of the GLSL shader program - * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) - */ -GLuint LoadShader(std::string_view source, GLenum type); - -/** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param separable_program whether to create a separable program - * @param shaders ID of shaders to attach to the program - * @returns Handle of the newly created OpenGL program object - */ -template -GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) { - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - - ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - if (hint_retrievable) { - glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - GLint result = GL_FALSE; - GLint info_log_length; - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::string program_error(info_log_length, ' '); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", program_error); - } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); - } - } - - if (result == GL_FALSE) { - // There was a problem linking the shader, print the source for debugging purposes. - LogShaderSource(shaders...); - } - - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - - ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); - - return program_id; -} - -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a4805f3da..b8777643b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,6 +24,7 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" @@ -230,13 +231,10 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - OGLShader vertex_shader; - vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - - OGLShader fragment_shader; - fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - - present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); + present_program.handle = glCreateProgram(); + AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); + AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); + LinkProgram(present_program.handle); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 51e72b705..8aa0683c8 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -17,6 +17,7 @@ #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -40,13 +41,12 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; using VideoCore::Surface::BytesPerBlock; namespace { - OGLProgram MakeProgram(std::string_view source) { - OGLShader shader; - shader.Create(source, GL_COMPUTE_SHADER); - OGLProgram program; - program.Create(true, false, shader.handle); + OGLShader shader; + program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, program.handle, source); + LinkProgram(program.handle); return program; } @@ -54,7 +54,6 @@ size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { return static_cast(copy.extent.width * copy.extent.height * copy.src_subresource.num_layers); } - } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) From 7eaa74ad235b669608debaf3583af94bd675b6c6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 7 Jun 2021 20:43:00 -0300 Subject: [PATCH 537/770] gl_texture_cache: Create image storage views Fixes SULD.D tests. --- .../renderer_opengl/gl_compute_pipeline.cpp | 5 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 5 +- .../renderer_opengl/gl_texture_cache.cpp | 132 +++++++++++++----- .../renderer_opengl/gl_texture_cache.h | 22 ++- 4 files changed, 126 insertions(+), 38 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 61b6fe4b7..a40106c87 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -178,7 +178,10 @@ void ComputePipeline::Configure() { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a5d65fdca..a2ea35d5a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -332,7 +332,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } }}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7053be161..c373c9cb4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -328,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return GL_R8I; + case Shader::ImageFormat::R8_UINT: + return GL_R8UI; + case Shader::ImageFormat::R16_UINT: + return GL_R16UI; + case Shader::ImageFormat::R16_SINT: + return GL_R16I; + case Shader::ImageFormat::R32_UINT: + return GL_R32UI; + case Shader::ImageFormat::R32G32_UINT: + return GL_RG32UI; + case Shader::ImageFormat::R32G32B32A32_UINT: + return GL_RGBA32UI; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return GL_R32UI; +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -837,21 +859,28 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } else { internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } - VideoCommon::SubresourceRange flatten_range = info.range; - std::array handles; - stored_views.reserve(2); - + full_range = info.range; + flat_range = info.range; + set_object_label = device.HasDebuggingToolAttached(); + is_render_target = info.IsRenderTarget(); + original_texture = image.texture.handle; + num_samples = image.info.num_samples; + if (!is_render_target) { + swizzle[0] = info.x_source; + swizzle[1] = info.y_source; + swizzle[2] = info.z_source; + swizzle[3] = info.w_source; + } switch (info.type) { case ImageViewType::e1DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e1D: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); + SetupView(Shader::TextureType::Color1D); + SetupView(Shader::TextureType::ColorArray1D); break; case ImageViewType::e2DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e2D: if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { @@ -861,26 +890,23 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .base = {.level = info.range.base.level, .layer = 0}, .extent = {.levels = 1, .layers = 1}, }; - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + full_range = slice_range; + + SetupView(Shader::TextureType::Color3D); } else { - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, - info.range); + SetupView(Shader::TextureType::Color2D); + SetupView(Shader::TextureType::ColorArray2D); } break; case ImageViewType::e3D: - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); + SetupView(Shader::TextureType::Color3D); break; case ImageViewType::CubeArray: - flatten_range.extent.layers = 6; + flat_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); + SetupView(Shader::TextureType::ColorCube); + SetupView(Shader::TextureType::ColorArrayCube); break; case ImageViewType::Rect: UNIMPLEMENTED(); @@ -928,22 +954,62 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, - GLuint handle, const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range) { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, +GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + GLuint& view{type_views[static_cast(texture_type)]}; + if (view == 0) { + view = MakeView(texture_type, ShaderFormat(image_format)); + } + return view; +} + +void ImageView::SetupView(Shader::TextureType view_type) { + views[static_cast(view_type)] = MakeView(view_type, internal_format); +} + +GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { + VideoCommon::SubresourceRange view_range; + switch (view_type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Color2D: + case Shader::TextureType::ColorCube: + view_range = flat_range; + break; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::Color3D: + case Shader::TextureType::ColorArrayCube: + view_range = full_range; + break; + default: + UNREACHABLE(); + } + OGLTextureView& view = stored_views.emplace_back(); + view.Create(); + + const GLenum target = ImageTarget(view_type, num_samples); + glTextureView(view.handle, target, original_texture, view_format, view_range.base.level, view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); + if (!is_render_target) { + std::array casted_swizzle; + std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) { + return static_cast(component_swizzle); + }); + ApplySwizzle(view.handle, format, casted_swizzle); } - if (device.HasDebuggingToolAttached()) { + if (set_object_label) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + glObjectLabel(GL_TEXTURE, view.handle, static_cast(name.size()), name.data()); } - stored_views.emplace_back().handle = handle; - views[static_cast(view_type)] = handle; + return view.handle; } Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 2e3e02b79..921072ebe 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -185,6 +185,9 @@ public: const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { return views[static_cast(handle_type)]; } @@ -206,16 +209,29 @@ public: } private: - void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, - const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range); + struct StorageViews { + std::array signeds{}; + std::array unsigneds{}; + }; + + void SetupView(Shader::TextureType view_type); + + GLuint MakeView(Shader::TextureType view_type, GLenum view_format); std::array views{}; std::vector stored_views; + std::unique_ptr storage_views; GLenum internal_format = GL_NONE; GLuint default_handle = 0; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; + GLuint original_texture = 0; + int num_samples = 0; + VideoCommon::SubresourceRange flat_range; + VideoCommon::SubresourceRange full_range; + std::array swizzle{}; + bool set_object_label = false; + bool is_render_target = false; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; From 15bdd27cac4a0b1e6cd168272dc337cd685ef144 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 9 Jun 2021 23:33:48 -0400 Subject: [PATCH 538/770] shader_environment: Add shader_local_memory_crs_size to local memory size Fixes DOOM 2016 missing local memory --- src/shader_recompiler/program_header.h | 4 ++-- src/video_core/shader_environment.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 15f43f2d8..6933750aa 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -230,8 +230,8 @@ struct ProgramHeader { }; [[nodiscard]] u64 LocalMemorySize() const noexcept { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); + return static_cast(common1.shader_local_memory_low_size) | + (static_cast(common2.shader_local_memory_high_size) << 24); } }; static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index c93174519..a7a57a36f 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -69,7 +69,7 @@ u32 GenericEnvironment::TextureBoundBuffer() const { } u32 GenericEnvironment::LocalMemorySize() const { - return local_memory_size; + return local_memory_size + sph.common3.shader_local_memory_crs_size; } u32 GenericEnvironment::SharedMemorySize() const { From 60a96c49e59e600685b9a79d80b2685318b4fb64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:24:12 -0300 Subject: [PATCH 539/770] buffer_cache: Fix copy based uniform bindings tracking --- src/video_core/buffer_cache/buffer_cache.h | 19 +++++++++++++++---- .../renderer_opengl/gl_buffer_cache.h | 12 +++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ec64f2293..47cb0a47d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -680,6 +680,9 @@ void BufferCache

::SetUniformBuffersState(const std::array& m const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } dirty_uniform_buffers.fill(~u32{0}); } } @@ -1020,6 +1023,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia if (!HasFastUniformBufferBound(stage, binding_index)) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1027,8 +1031,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -1046,9 +1051,15 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // This exists to avoid instances where the fast buffer is bound and a GPU write happens return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bc16abafb..060d36427 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -92,16 +92,14 @@ public: VideoCore::Surface::PixelFormat format); void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast(size); if (use_assembly_shaders) { - const GLuint handle = fast_uniforms[stage][binding_index].handle; - const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - fast_uniforms[stage][binding_index].handle, 0, - static_cast(size)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size); } } @@ -134,6 +132,10 @@ public: return has_fast_buffer_sub_data; } + [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept { + return !use_assembly_shaders; + } + void SetBaseUniformBindings(const std::array& bindings) { graphics_base_uniform_bindings = bindings; } From 5befc0bf872058315c4f81bf58dcd173db2589fd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:27:00 -0300 Subject: [PATCH 540/770] shader_environment: Fix local memory size calculations --- src/video_core/shader_environment.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index a7a57a36f..6243cd176 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -69,7 +69,7 @@ u32 GenericEnvironment::TextureBoundBuffer() const { } u32 GenericEnvironment::LocalMemorySize() const { - return local_memory_size + sph.common3.shader_local_memory_crs_size; + return local_memory_size; } u32 GenericEnvironment::SharedMemorySize() const { @@ -233,7 +233,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, } const u64 local_size{sph.LocalMemorySize()}; ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); + local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.tex_cb_index; } @@ -261,7 +261,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com &kepler_compute_} { const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; + local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; texture_bound = kepler_compute->regs.tex_cb_index; shared_memory_size = qmd.shared_alloc; workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; From cd8427367ed372e355fa76a78d41b3bc64f997ca Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 10 Jun 2021 01:55:27 -0400 Subject: [PATCH 541/770] gl_buffer_cache: Use unorm internal formats for snorm texture buffer views Fixes black textures in UE4 games --- .../renderer_opengl/gl_buffer_cache.cpp | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 334ed470f..0703614de 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -25,6 +25,25 @@ constexpr std::array PROGRAM_LUT{ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; + +[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { + switch (gl_format) { + case GL_RGBA8_SNORM: + return GL_RGBA8; + case GL_R8_SNORM: + return GL_R8; + case GL_RGBA16_SNORM: + return GL_RGBA16; + case GL_R16_SNORM: + return GL_R16; + case GL_RG16_SNORM: + return GL_RG16; + case GL_RG8_SNORM: + return GL_RG8; + default: + return gl_format; + } +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -76,7 +95,11 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { OGLTexture texture; texture.Create(GL_TEXTURE_BUFFER); const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; - glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + const GLenum texture_format{GetTextureBufferFormat(gl_format)}; + if (texture_format != gl_format) { + LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); + } + glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); views.push_back({ .offset = offset, .size = size, From 8554a644df7ad909e418f3e96016e95abc55712f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 00:18:24 -0300 Subject: [PATCH 542/770] spirv/convert: Catch more broken signed operations on Nvidia OpenGL BitCast U32 to S32 before converting to float on drivers with broken signed operations. --- src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index fd74e475f..2c4250a0c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -205,6 +205,9 @@ Id EmitConvertF32S16(EmitContext& ctx, Id value) { } Id EmitConvertF32S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } return ctx.OpConvertSToF(ctx.F32[1], value); } @@ -237,6 +240,9 @@ Id EmitConvertF64S16(EmitContext& ctx, Id value) { } Id EmitConvertF64S32(EmitContext& ctx, Id value) { + if (ctx.profile.has_broken_signed_operations) { + value = ctx.OpBitcast(ctx.S32[1], value); + } return ctx.OpConvertSToF(ctx.F64[1], value); } From d52bacf6f035ddbc4b2333953709cbd3993e4817 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 01:11:59 -0400 Subject: [PATCH 543/770] spirv/convert: Catch more signed operations oversights The sign bit on integers of size < 32 was not properly preserved in casts --- .../backend/spirv/emit_spirv_convert.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 2c4250a0c..fd42b7a16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -17,14 +17,14 @@ Id ExtractU16(EmitContext& ctx, Id value) { Id ExtractS16(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.S16, value); + return ctx.OpSConvert(ctx.S16, value); } else { return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); } } Id ExtractU8(EmitContext& ctx, Id value) { - if (ctx.profile.support_int16) { + if (ctx.profile.support_int8) { return ctx.OpUConvert(ctx.U8, value); } else { return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); @@ -42,7 +42,7 @@ Id ExtractS8(EmitContext& ctx, Id value) { Id EmitConvertS16F16(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); } else { return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); } @@ -50,7 +50,7 @@ Id EmitConvertS16F16(EmitContext& ctx, Id value) { Id EmitConvertS16F32(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); } else { return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); } @@ -58,7 +58,7 @@ Id EmitConvertS16F32(EmitContext& ctx, Id value) { Id EmitConvertS16F64(EmitContext& ctx, Id value) { if (ctx.profile.support_int16) { - return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); + return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); } else { return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); } From d554778311c32e0a19ecdc13d7525b264d8443b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:52:04 -0300 Subject: [PATCH 544/770] vulkan: Use VK_EXT_provoking_vertex when available --- src/video_core/engines/maxwell_3d.h | 7 ++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 +-- .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 ++++++- .../vulkan_common/vulkan_device.cpp | 28 +++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 6 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cbf94412b..04d5790f6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1151,7 +1151,11 @@ public: u32 index; } primitive_restart; - INSERT_PADDING_WORDS_NOINIT(0x5F); + INSERT_PADDING_WORDS_NOINIT(0xE); + + u32 provoking_vertex_last; + + INSERT_PADDING_WORDS_NOINIT(0x50); struct { u32 start_addr_high; @@ -1672,6 +1676,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); +ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1486d088a..f121fbf0e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,6 +84,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); @@ -91,8 +93,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 0f1eff9cd..60adae316 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -192,6 +192,7 @@ struct FixedPipelineState { BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; BitField<10, 1, u32> y_negate; + BitField<11, 1, u32> provoking_vertex_last; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5c916c869..06a80c2ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -567,9 +567,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { viewport_ci.pNext = &swizzle_ci; } + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; const VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -586,6 +593,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2b715baba..618535aae 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + if (ext_provoking_vertex) { + provoking_vertex = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .pNext = nullptr, + .provokingVertexLast = VK_TRUE, + .transformFeedbackPreservesProvokingVertex = VK_TRUE, + }; + SetNext(next, provoking_vertex); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -718,6 +731,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; + bool has_ext_provoking_vertex{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -748,6 +762,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -799,6 +814,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_provoking_vertex) { + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; + provoking_vertex.pNext = nullptr; + features.pNext = &provoking_vertex; + physical.GetFeatures2KHR(features); + + if (provoking_vertex.provokingVertexLast && + provoking_vertex.transformFeedbackPreservesProvokingVertex) { + extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + ext_provoking_vertex = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 9bc1fb947..37f589612 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -244,6 +244,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_provoking_vertex. + bool IsExtProvokingVertexSupported() const { + return ext_provoking_vertex; + } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { return ext_shader_atomic_int64; @@ -346,6 +351,7 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached From 3025b2f605df74a129f0f47aadd4247055ecd6bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:53:38 -0300 Subject: [PATCH 545/770] vk_rasterizer: Implement first index --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d284b3653..e339e9739 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -54,6 +54,7 @@ struct DrawParams { u32 num_instances; u32 base_vertex; u32 num_vertices; + u32 first_index; bool is_indexed; }; @@ -103,6 +104,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan .num_instances = is_instanced ? num_instances : 1, .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, + .first_index = is_indexed ? regs.index_array.first : 0, .is_indexed = is_indexed, }; if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { @@ -173,8 +175,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, + draw_params.first_index, draw_params.base_vertex, + draw_params.base_instance); } else { cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, draw_params.base_vertex, draw_params.base_instance); From cb78a1b494be2f6bc0927ed5b7a878236a3dc1c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 01:46:30 -0300 Subject: [PATCH 546/770] shader: Reorder shader cache directories --- .../renderer_opengl/gl_shader_cache.cpp | 13 +++++-------- .../renderer_vulkan/vk_pipeline_cache.cpp | 17 +++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index da0b36368..9391a4cd9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -238,16 +238,13 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "new_opengl"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories"); return; } - shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + shader_cache_filename = base_dir / "opengl.bin"; if (!workers) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e61d76490..6df4088a7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -337,22 +337,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "vulkan"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); return; } - pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + pipeline_cache_filename = base_dir / "vulkan.bin"; struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { From ea038d66538975319858f792052af1d0fa997fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 05:07:52 -0300 Subject: [PATCH 547/770] vulkan: Add VK_EXT_vertex_input_dynamic_state support Reduces the number of total pipelines generated on Vulkan. Tested on Super Smash Bros. Ultimate. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 67 +++++++---- .../renderer_vulkan/fixed_pipeline_state.h | 73 +++++++----- .../renderer_vulkan/vk_graphics_pipeline.cpp | 109 +++++++++++------- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 ++++- .../renderer_vulkan/vk_rasterizer.cpp | 56 +++++++++ .../renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_state_tracker.cpp | 52 ++++++--- .../renderer_vulkan/vk_state_tracker.h | 8 +- src/video_core/vulkan_common/vulkan_device.h | 6 + .../vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 8 ++ 11 files changed, 293 insertions(+), 118 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f121fbf0e..16cef8711 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -50,7 +50,7 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state) { + bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; const std::array enabled_lut{ regs.polygon_offset_point_enable, @@ -60,7 +60,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; - no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); + dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); @@ -73,11 +74,11 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); msaa_mode.Assign(regs.multisample_mode); raw2 = 0; + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); @@ -93,24 +94,44 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { - maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; - } - } - if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { - maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); + if (maxwell3d.dirty.flags[Dirty::VertexInput]) { + if (has_dynamic_vertex_input) { + // Dirty flag will be reset by the command buffer update + static constexpr std::array LUT{ + 0u, // Invalid + 1u, // SignedNorm + 1u, // UnsignedNorm + 2u, // SignedInt + 3u, // UnsignedInt + 1u, // UnsignedScaled + 1u, // SignedScaled + 1u, // Float + }; + const auto& attrs = regs.vertex_attrib_format; + attribute_types = 0; + for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + const u32 mask = attrs[i].constant != 0 ? 0 : 3; + const u32 type = LUT[static_cast(attrs[i].type.Value())]; + attribute_types |= static_cast(type & mask) << (i * 2); + } + } else { + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); + } } } if (maxwell3d.dirty.flags[Dirty::Blending]) { @@ -126,10 +147,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (no_extended_dynamic_state != 0) { + if (!extended_dynamic_state) { dynamic_state.Refresh(regs); } - if (xfb_enabled != 0) { + if (xfb_enabled) { RefreshXfbState(xfb_state, regs); } } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 60adae316..04f34eb97 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -168,44 +168,51 @@ struct FixedPipelineState { union { u32 raw1; - BitField<0, 1, u32> no_extended_dynamic_state; - BitField<1, 1, u32> xfb_enabled; - BitField<2, 1, u32> primitive_restart_enable; - BitField<3, 1, u32> depth_bias_enable; - BitField<4, 1, u32> depth_clamp_disabled; - BitField<5, 1, u32> ndc_minus_one_to_one; - BitField<6, 2, u32> polygon_mode; - BitField<8, 5, u32> patch_control_points_minus_one; - BitField<13, 2, u32> tessellation_primitive; - BitField<15, 2, u32> tessellation_spacing; - BitField<17, 1, u32> tessellation_clockwise; - BitField<18, 1, u32> logic_op_enable; - BitField<19, 4, u32> logic_op; - BitField<23, 1, u32> rasterize_enable; + BitField<0, 1, u32> extended_dynamic_state; + BitField<1, 1, u32> dynamic_vertex_input; + BitField<2, 1, u32> xfb_enabled; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> depth_clamp_disabled; + BitField<6, 1, u32> ndc_minus_one_to_one; + BitField<7, 2, u32> polygon_mode; + BitField<9, 5, u32> patch_control_points_minus_one; + BitField<14, 2, u32> tessellation_primitive; + BitField<16, 2, u32> tessellation_spacing; + BitField<18, 1, u32> tessellation_clockwise; + BitField<19, 1, u32> logic_op_enable; + BitField<20, 4, u32> logic_op; BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> early_z; - BitField<4, 1, u32> depth_enabled; - BitField<5, 5, u32> depth_format; - BitField<10, 1, u32> y_negate; - BitField<11, 1, u32> provoking_vertex_last; + BitField<0, 1, u32> rasterize_enable; + BitField<1, 3, u32> alpha_test_func; + BitField<4, 1, u32> early_z; + BitField<5, 1, u32> depth_enabled; + BitField<6, 5, u32> depth_format; + BitField<11, 1, u32> y_negate; + BitField<12, 1, u32> provoking_vertex_last; }; std::array color_formats; u32 alpha_test_ref; u32 point_size; - std::array binding_divisors; - std::array attributes; std::array attachments; std::array viewport_swizzles; + union { + u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state + u64 enabled_divisors; + }; + std::array attributes; + std::array binding_divisors; + DynamicState dynamic_state; VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, + bool has_dynamic_vertex_input); size_t Hash() const noexcept; @@ -216,16 +223,24 @@ struct FixedPipelineState { } size_t Size() const noexcept { - if (xfb_enabled != 0) { + if (xfb_enabled) { // When transform feedback is enabled, use the whole struct return sizeof(*this); - } else if (no_extended_dynamic_state != 0) { - // Dynamic state is enabled, we can enable more - return offsetof(FixedPipelineState, xfb_state); - } else { - // No XFB, extended dynamic state enabled + } + if (dynamic_vertex_input) { + // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, attributes); + } + if (extended_dynamic_state) { + // Exclude dynamic state return offsetof(FixedPipelineState, dynamic_state); } + // Default + return offsetof(FixedPipelineState, xfb_state); + } + + u32 DynamicAttributeType(size_t index) const noexcept { + return (attribute_types >> (index * 2)) & 0b11; } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 06a80c2ba..ccef71f4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,39 +472,65 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (!device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = key.state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = key.state.binding_divisors[index], + static_vector vertex_attributes; + if (key.state.dynamic_vertex_input) { + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const u32 type = key.state.DynamicAttributeType(index); + if (!input_attributes[index].used || type == 0) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = 0, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT + : VK_FORMAT_R32_UINT, + .offset = 0, }); } - } - static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < key.state.attributes.size(); ++index) { - const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { - continue; + if (!vertex_attributes.empty()) { + vertex_bindings.push_back({ + .binding = 0, + .stride = 4, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }); + } + } else { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = + instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = key.state.binding_divisors[index], + }); + } + } + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !input_attributes[index].used) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); } VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -545,27 +571,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; + std::array swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } + const VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, @@ -660,13 +684,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; - if (device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, @@ -678,6 +702,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; + if (key.state.dynamic_vertex_input) { + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6df4088a7..db7da5555 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,6 +109,20 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { + switch (state.DynamicAttributeType(index)) { + case 0: + return Shader::AttributeType::Disabled; + case 1: + return Shader::AttributeType::Float; + case 2: + return Shader::AttributeType::SignedInt; + case 3: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Disabled; +} + Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -123,13 +137,19 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { info.fixed_state_point_size = point_size; } - if (key.state.xfb_enabled != 0) { + if (key.state.xfb_enabled) { info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } - std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), - &CastAttributeType); + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + info.generic_input_types[index] = AttributeType(key.state, index); + } + } else { + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + } break; case Shader::Stage::TessellationEval: // We have to flip tessellation clockwise for some reason... @@ -298,7 +318,8 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + device.IsExtVertexInputDynamicStateSupported()); if (current_pipeline) { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e339e9739..855c17769 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -551,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateFrontFace(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { + UpdateVertexInput(regs); + } } } @@ -780,4 +783,57 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } +void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[Dirty::VertexInput]) { + return; + } + dirty[Dirty::VertexInput] = false; + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexAttribute0 + index]) { + continue; + } + const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; + const u32 binding{attribute.buffer}; + dirty[Dirty::VertexAttribute0 + index] = false; + dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; + + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = attribute.IsConstant() + ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 + : MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexBinding0 + index]) { + continue; + } + dirty[Dirty::VertexBinding0 + index] = false; + + const u32 binding{static_cast(index)}; + const auto& input_binding{regs.vertex_array[binding]}; + const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; + bindings.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, + .pNext = nullptr, + .binding = binding, + .stride = input_binding.stride, + .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, + .divisor = is_instanced ? input_binding.divisor : 1, + }); + } + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + cmdbuf.SetVertexInputEXT(bindings, attributes); + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1302bed02..c954fa7f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -135,6 +135,8 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..0ebe0473f 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, + Viewports, Scissors, DepthBias, BlendConstants, + DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, + DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, + StencilOp, StencilTestEnable, VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { flags[index] = true; } + for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { + flags[index] = true; + } + for (int index = VertexBinding0; index <= VertexBinding31; ++index) { + flags[index] = true; + } return flags; } @@ -134,19 +141,6 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); } -void SetupDirtyInstanceDivisors(Tables& tables) { - static constexpr size_t divisor_offset = 3; - for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { - tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; - tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = - InstanceDivisors; - } -} - -void SetupDirtyVertexAttributes(Tables& tables) { - FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); -} - void SetupDirtyViewportSwizzles(Tables& tables) { static constexpr size_t swizzle_offset = 6; for (size_t index = 0; index < Regs::NumViewports; ++index) { @@ -154,11 +148,31 @@ void SetupDirtyViewportSwizzles(Tables& tables) { ViewportSwizzles; } } + +void SetupDirtyVertexAttributes(Tables& tables) { + for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); + } + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); +} + +void SetupDirtyVertexBindings(Tables& tables) { + // Do NOT include stride here, it's implicit in VertexBuffer + static constexpr size_t divisor_offset = 3; + for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const u8 flag = static_cast(VertexBinding0 + i); + tables[0][OFF(instanced_arrays) + i] = VertexInput; + tables[1][OFF(instanced_arrays) + i] = flag; + tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; + tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; + } +} } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables = gpu.Maxwell3D().dirty.tables; + auto& tables{gpu.Maxwell3D().dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -175,9 +189,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); - SetupDirtyInstanceDivisors(tables); - SetupDirtyVertexAttributes(tables); SetupDirtyViewportSwizzles(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyVertexBindings(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..1976b7e9b 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -19,6 +19,12 @@ namespace Dirty { enum : u8 { First = VideoCommon::Dirty::LastCommonEntry, + VertexInput, + VertexAttribute0, + VertexAttribute31 = VertexAttribute0 + 31, + VertexBinding0, + VertexBinding31 = VertexBinding0 + 31, + Viewports, Scissors, DepthBias, @@ -36,8 +42,6 @@ enum : u8 { StencilTestEnable, Blending, - InstanceDivisors, - VertexAttributes, ViewportSwizzles, Last diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 37f589612..4fda472b0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -239,6 +239,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. + bool IsExtVertexInputDynamicStateSupported() const { + return ext_vertex_input_dynamic_state; + } + /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { return ext_shader_stencil_export; @@ -349,6 +354,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 33fb74bfb..7e13ae8af 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -123,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdSetVertexInputEXT); X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 3e36d356a..6e5be1186 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -238,6 +238,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; @@ -1203,6 +1204,13 @@ public: dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } + void SetVertexInputEXT( + vk::Span bindings, + vk::Span attributes) const noexcept { + dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), + attributes.data()); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { From 3c125d41348b08a467333cf0e40bed7ce26cd7cc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 16:23:04 -0400 Subject: [PATCH 548/770] tmml: Remove index component from coords vec The lod query functions exposed by the rendering API's do not make use of the texturearray layer indexing. --- .../maxwell/translate/impl/texture_mipmap_level.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index abf87a0df..667c69a0d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -46,16 +46,15 @@ Shader::TextureType GetType(TextureType type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + return v.F(reg + 1); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -63,7 +62,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); } throw NotImplementedException("Invalid texture type {}", type); } From 544579926069050d4a9b37c9584705919aab24e8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 16:57:31 -0400 Subject: [PATCH 549/770] main: Fix Open Transferable Shader Cache context item Opens the new shader cache directory location for the specified title, if it exists. --- src/yuzu/main.cpp | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 7e0b1adc4..562dfa620 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1654,35 +1654,15 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) { const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); - const auto transferable_shader_cache_folder_path = shader_cache_dir / "opengl" / "transferable"; - const auto transferable_shader_cache_file_path = - transferable_shader_cache_folder_path / fmt::format("{:016X}.bin", program_id); - - if (!Common::FS::Exists(transferable_shader_cache_file_path)) { + const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)}; + if (!Common::FS::Exists(shader_cache_folder_path)) { QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"), tr("A shader cache for this title does not exist.")); return; } - - const auto qt_shader_cache_folder_path = - QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_folder_path)); - const auto qt_shader_cache_file_path = - QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_file_path)); - - // Windows supports opening a folder with selecting a specified file in explorer. On every other - // OS we just open the transferable shader cache folder without preselecting the transferable - // shader cache file for the selected game. -#if defined(Q_OS_WIN) - const QString explorer = QStringLiteral("explorer"); - QStringList param; - if (!QFileInfo(qt_shader_cache_file_path).isDir()) { - param << QStringLiteral("/select,"); - } - param << QDir::toNativeSeparators(qt_shader_cache_file_path); - QProcess::startDetached(explorer, param); -#else - QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_folder_path)); -#endif + const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)}; + const auto qt_shader_cache_path = QString::fromStdString(shader_path_string); + QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_path)); } static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) { From 41cca8b8ad6f7ea33e74210aee4e3867ffa0622e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:24:49 -0300 Subject: [PATCH 550/770] vk_pipeline_cache: Skip cached pipelines with different dynamic state --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index db7da5555..b17f34cdd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -391,10 +391,16 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; + const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); + const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); + if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || + (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { + return; + } workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; From ba3bdf1d4156fa6fd257305406a3b88f0c288006 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:25:40 -0300 Subject: [PATCH 551/770] vulkan_device: Enable VK_EXT_vertex_input_dynamic_state --- .../vulkan_common/vulkan_device.cpp | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 618535aae..8eb37a77a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -425,6 +425,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); } + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; + if (ext_vertex_input_dynamic_state) { + vertex_input_dynamic = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .vertexInputDynamicState = VK_TRUE, + }; + SetNext(next, vertex_input_dynamic); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -732,6 +744,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; + bool has_ext_vertex_input_dynamic_state{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -763,6 +776,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); + test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, + false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -827,6 +842,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_provoking_vertex = true; } } + if (has_ext_vertex_input_dynamic_state) { + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; + vertex_input.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; + vertex_input.pNext = nullptr; + features.pNext = &vertex_input; + physical.GetFeatures2KHR(features); + + if (vertex_input.vertexInputDynamicState) { + extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + ext_vertex_input_dynamic_state = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; From 487057b8d2bd79892423ad3a1b5a96d0407b307a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:28:06 -0300 Subject: [PATCH 552/770] shader: Comment why the array component is not read in TMML --- .../frontend/maxwell/translate/impl/texture_mipmap_level.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 667c69a0d..aea3c0e62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -46,6 +46,8 @@ Shader::TextureType GetType(TextureType type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + // The ISA reads an array component here, but this is not needed on high level shading languages + // We are dropping this information. switch (type) { case TextureType::_1D: return v.F(reg); From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: [PATCH 553/770] shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/common/settings.h | 3 + .../backend/glasm/emit_glasm_instructions.h | 2 + .../glasm/emit_glasm_not_implemented.cpp | 8 +++ .../spirv/emit_spirv_context_get_set.cpp | 24 ++++--- .../backend/spirv/emit_spirv_instructions.h | 2 + .../frontend/ir/ir_emitter.cpp | 14 +++- .../frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../maxwell/structured_control_flow.cpp | 42 ++++++++++-- .../ir_opt/ssa_rewrite_pass.cpp | 66 +++++++++++++------ .../vulkan_common/vulkan_device.cpp | 4 +- src/yuzu/configuration/config.cpp | 4 ++ src/yuzu/configuration/configure_debug.cpp | 8 +++ src/yuzu/configuration/configure_debug.ui | 26 ++++++++ src/yuzu_cmd/config.cpp | 2 + src/yuzu_cmd/default_ini.h | 8 +++ 16 files changed, 183 insertions(+), 35 deletions(-) diff --git a/src/common/settings.h b/src/common/settings.h index ce1bc647d..ac0590690 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -308,6 +308,9 @@ struct Values { // Renderer Setting renderer_backend{RendererBackend::OpenGL, "backend"}; BasicSetting renderer_debug{false, "debug"}; + BasicSetting enable_nsight_aftermath{false, "nsight_aftermath"}; + BasicSetting disable_shader_loop_safety_checks{false, + "disable_shader_loop_safety_checks"}; Setting vulkan_device{0, "vulkan_device"}; Setting resolution_factor{1, "resolution_factor"}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index c9f4826ce..fef9ff9be 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -42,6 +42,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 95bcbd750..60735fe31 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -153,6 +153,14 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } +void EmitSetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 442a958a5..42fff74e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -163,35 +163,43 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde } // Anonymous namespace void EmitGetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); +} + +void EmitSetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1181e7b4f..e3e5b03fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -43,6 +43,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e9fd41237..6c37af5e7 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,12 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + if (pred != IR::Pred::PT) { + Inst(Opcode::SetPred, pred, value); + } +} + U1 IREmitter::GetGotoVariable(u32 id) { return Inst(Opcode::GetGotoVariable, id); } @@ -141,8 +147,12 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -void IREmitter::SetPred(IR::Pred pred, const U1& value) { - Inst(Opcode::SetPred, pred, value); +U32 IREmitter::GetLoopSafetyVariable(u32 id) { + return Inst(Opcode::GetLoopSafetyVariable, id); +} + +void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { + Inst(Opcode::SetLoopSafetyVariable, id, counter); } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..7caab1f61 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,6 +55,9 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); + void SetLoopSafetyVariable(u32 id, const U32& counter); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..e87aeddd5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,6 +32,8 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) +OPCODE(GetLoopSafetyVariable, U32, U32, ) +OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c1e0646e6..b2b8c492a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -9,11 +9,13 @@ #include #include #include +#include #include #include +#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -739,8 +741,25 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - current_block->AddBranch(loop_header_block); + const u32 this_loop_id{loop_id++}; + + if (Settings::values.disable_shader_loop_safety_checks) { + if (current_block) { + current_block->AddBranch(loop_header_block); + } + } else { + IR::Block* const init_block{block_pool.Create(inst_pool)}; + IR::IREmitter ir{*init_block}; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + if (current_block) { + current_block->AddBranch(init_block); + } + init_block->AddBranch(loop_header_block); + + auto& init_node{syntax_list.emplace_back()}; + init_node.type = IR::AbstractSyntaxNode::Type::Block; + init_node.data.block = init_block; } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -758,7 +777,16 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; + const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; + ir.SetLoopSafetyVariable(this_loop_id, new_counter); + + const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; + cond = ir.LogicalAnd(cond, safety_cond); + } + cond = ir.ConditionRef(cond); IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -863,8 +891,14 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - // TODO: Make this constexpr when std::vector is constexpr + u32 loop_id{}; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else const Flow::Block dummy_flow_block; +#endif }; } // Anonymous namespace diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index e54499ba5..a4ba393ef 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,73 +48,91 @@ struct GotoVariable : FlagTag { u32 index; }; +struct LoopSafetyVariable { + LoopSafetyVariable() = default; + explicit LoopSafetyVariable(u32 index_) : index{index_} {} + + auto operator<=>(const LoopSafetyVariable&) const noexcept = default; + + u32 index; +}; + struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = std::variant; -using ValueMap = boost::container::flat_map>; +using Variant = + std::variant; +using ValueMap = boost::container::flat_map; struct DefTable { - const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Reg variable) { return block->SsaRegValue(variable); } - void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { block->SetSsaRegValue(variable, value); } - const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Pred variable) { return preds[IR::PredIndex(variable)][block]; } - void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + const IR::Value& Def(IR::Block* block, GotoVariable variable) { return goto_vars[variable.index][block]; } - void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { + return loop_safety_vars[variable.index][block]; + } + void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { + loop_safety_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } - void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { indirect_branch_var.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { return zero_flag[block]; } - void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { zero_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, SignFlagTag) { return sign_flag[block]; } - void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { sign_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, CarryFlagTag) { return carry_flag[block]; } - void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { carry_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { return overflow_flag[block]; } - void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { overflow_flag.insert_or_assign(block, value); } std::array preds; boost::container::flat_map goto_vars; + boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -134,6 +152,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -315,6 +337,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetLoopSafetyVariable: + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -343,6 +368,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetLoopSafetyVariable: + inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8eb37a77a..bf063c047 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -467,7 +467,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { + if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { @@ -781,7 +781,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { + if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a5e032959..dc69574a9 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -824,6 +824,8 @@ void Config::ReadRendererValues() { if (global) { ReadBasicSetting(Settings::values.renderer_debug); + ReadBasicSetting(Settings::values.enable_nsight_aftermath); + ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); @@ -1353,6 +1355,8 @@ void Config::SaveRendererValues() { if (global) { WriteBasicSetting(Settings::values.renderer_debug); + WriteBasicSetting(Settings::values.enable_nsight_aftermath); + WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 8fceb3878..f7e29dbd7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() { ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_cpu_debugging->setEnabled(runtime_lock); ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); + ui->enable_nsight_aftermath->setEnabled(runtime_lock); + ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); + ui->disable_loop_safety_checks->setEnabled(runtime_lock); + ui->disable_loop_safety_checks->setChecked( + Settings::values.disable_shader_loop_safety_checks.GetValue()); ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); } @@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); + Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); + Settings::values.disable_shader_loop_safety_checks = + ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Debugger::ToggleConsole(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 1260ad6f0..c8baf2921 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -125,6 +125,16 @@ + + + + When checked, it enables Nsight Aftermath crash dumps + + + Enable Nsight Aftermath + + + @@ -138,6 +148,16 @@ + + + + When checked, it executes shaders without loop logic changes + + + Disable Loop safety checks + + + @@ -252,11 +272,17 @@ log_filter_edit toggle_console + extended_logging open_log_button homebrew_args_edit enable_graphics_debugging + enable_nsight_aftermath + disable_macro_jit + disable_loop_safety_checks reporting_services quest_flag + use_debug_asserts + use_auto_stub diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3e22fee37..763df6dd6 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -444,6 +444,8 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); ReadSetting("Renderer", Settings::values.renderer_debug); + ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); + ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); ReadSetting("Renderer", Settings::values.fullscreen_mode); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 88d33ecab..a6ca7b6cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -221,6 +221,14 @@ backend = # 0 (default): Disabled, 1: Enabled debug = +# Enable Nsight Aftermath crash dumps +# 0 (default): Disabled, 1: Enabled +nsight_aftermath = + +# Disable shader loop safety checks, executing the shader without loop logic changes +# 0 (default): Disabled, 1: Enabled +disable_shader_loop_safety_checks = + # Which Vulkan physical device to use (defaults to 0) vulkan_device = From 61cd7dd30128633b656ce3264da74bef1ba00bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 02:27:49 -0300 Subject: [PATCH 554/770] shader: Add logging --- src/common/logging/filter.cpp | 4 ++++ src/common/logging/types.h | 4 ++++ src/shader_recompiler/backend/glasm/emit_glasm.cpp | 2 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 ++++-- src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | 4 ++-- .../backend/glasm/emit_glasm_not_implemented.cpp | 2 +- src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp | 8 ++++---- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 8 ++++---- src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | 6 +++--- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ++-- .../backend/spirv/emit_spirv_special.cpp | 4 ++-- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 +- .../translate/impl/internal_stage_buffer_entry_read.cpp | 2 +- .../maxwell/translate/impl/move_special_register.cpp | 8 ++++---- .../frontend/maxwell/translate/impl/vote.cpp | 2 +- 15 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 4f2cc29e1..f055f0e11 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Render, Software) \ SUB(Render, OpenGL) \ SUB(Render, Vulkan) \ + CLS(Shader) \ + SUB(Shader, SPIRV) \ + SUB(Shader, GLASM) \ + SUB(Shader, GLSL) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 88b0e9c01..7ad0334fc 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -114,6 +114,10 @@ enum class Class : u8 { Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend Render_Vulkan, ///< Vulkan backend + Shader, ///< Shader recompiler + Shader_SPIRV, ///< Shader SPIR-V code generation + Shader_GLASM, ///< Shader GLASM code generation + Shader_GLSL, ///< Shader GLSL code generation Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index fc01797b6..832b4fd40 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -253,7 +253,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } if (!ctx.reg_alloc.IsEmpty()) { - // LOG_WARNING ...; + LOG_WARNING(Shader_GLASM, "Register leak after generating code"); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index c1df7a342..20b925877 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -145,14 +145,16 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.layer.x,{};", value); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, + "Layer stored outside of geometry shader not supported by device"); } break; case IR::Attribute::ViewportIndex: if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { ctx.Add("MOV.F result.viewport.x,{};", value); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, + "Viewport stored outside of geometry shader not supported by device"); } break; case IR::Attribute::PointSize: diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 81d5fe72c..09e3a9b82 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -139,12 +139,12 @@ void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR:: std::string GradOffset(const IR::Value& offset) { if (offset.IsImmediate()) { - // LOG_WARNING immediate + LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); return ""; } IR::Inst* const vector{offset.InstRecursive()}; if (!vector->AreAllArgsImmediates()) { - // LOG_WARNING elements not immediate + LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); return ""; } switch (vector->NumArgs()) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 60735fe31..a487a0744 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -115,7 +115,7 @@ void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { if (!stream.IsImmediate()) { - // LOG_WARNING not immediate + LOG_WARNING(Shader_GLASM, "Stream is not immediate"); } ctx.reg_alloc.Consume(stream); ctx.Add("ENDPRIM;"); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 8cec5ee7e..544d475b4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -115,7 +115,7 @@ void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDX.FINE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); ctx.Add("DDX {}.x,{};", inst, p); } } @@ -124,7 +124,7 @@ void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDY.FINE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); ctx.Add("DDY {}.x,{};", inst, p); } } @@ -133,7 +133,7 @@ void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDX.COARSE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); ctx.Add("DDX {}.x,{};", inst, p); } } @@ -142,7 +142,7 @@ void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { if (ctx.profile.support_derivative_control) { ctx.Add("DDY.COARSE {}.x,{};", inst, p); } else { - // LOG_WARNING + LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); ctx.Add("DDY {}.x,{};", inst, p); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index cba420cda..14a99750d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -294,7 +294,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit Id main_func) { const Info& info{program.info}; if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + LOG_ERROR(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -307,7 +307,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); } else { - // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + LOG_WARNING(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } } if (!profile.support_separate_denorm_behavior) { @@ -315,7 +315,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit return; } if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + LOG_ERROR(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -328,7 +328,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); } else { - // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + LOG_WARNING(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); } } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 053800eb7..9af8bb9e1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -73,7 +73,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; @@ -140,7 +140,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer_1{SharedPointer(ctx, offset, 0)}; const Id pointer_2{SharedPointer(ctx, offset, 1)}; const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; @@ -266,7 +266,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, binding, offset, sizeof(u32[2]))}; const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index cf842e1e0..647804814 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -39,7 +39,7 @@ public: } const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { - // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); + LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring"); return; } const IR::Opcode opcode{values[0]->GetOpcode()}; @@ -442,7 +442,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { const auto info{inst->Flags()}; if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { - // LOG_WARNING(..., "Typeless image read not supported by host"); + LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); return ctx.ConstantNull(ctx.U32[4]); } return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 072a3b1bd..9e7eb3cb1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -131,7 +131,7 @@ void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { if (stream.IsImmediate()) { ctx.OpEmitStreamVertex(ctx.Def(stream)); } else { - // LOG_WARNING(..., "EmitVertex's stream is not constant"); + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); ctx.OpEmitStreamVertex(ctx.u32_zero_value); } // Restore fixed pipeline point size after emitting the vertex @@ -142,7 +142,7 @@ void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { if (stream.IsImmediate()) { ctx.OpEndStreamPrimitive(ctx.Def(stream)); } else { - // LOG_WARNING(..., "EndPrimitive's stream is not constant"); + LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); ctx.OpEndStreamPrimitive(ctx.u32_zero_value); } } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6c37af5e7..d2ac2acac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -270,7 +270,7 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); case FlowTest::FCSM_TR: - // LOG_WARNING(ShaderDecompiler, "FCSM_TR CC State (Stubbed)"); + LOG_WARNING(Shader, "(STUBBED) FCSM_TR"); return ir.Imm1(false); case FlowTest::CSM_TA: case FlowTest::CSM_TR: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index edd6220a8..9b85f8059 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -46,7 +46,7 @@ void TranslatorVisitor::ISBERD(u64 insn) { if (isberd.shift != Shift::Default) { throw NotImplementedException("Shift {}", isberd.shift.Value()); } - // LOG_WARNING(..., "ISBERD is stubbed"); + LOG_WARNING(Shader, "(STUBBED) called"); X(isberd.dest_reg, X(isberd.src_reg)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index fe3cdfa96..20cb2674e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -118,7 +118,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: - // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID: { const IR::Value tid{ir.LocalInvocationId()}; @@ -140,10 +140,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); @@ -160,7 +160,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); case SpecialRegister::SR_AFFINITY: - // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 0793611ff..7ce370f09 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -48,7 +48,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); + LOG_WARNING(Shader, "(STUBBED) called"); } } // namespace Shader::Maxwell From 94e751f415d70fe255eada77c4385ec966c07a95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 04:32:45 -0300 Subject: [PATCH 555/770] buffer_cache: Invalidate fast buffers on compute --- src/video_core/buffer_cache/buffer_cache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 47cb0a47d..d004199ba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1132,6 +1132,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty dirty_uniform_buffers.fill(~u32{0}); + fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { From dbf7cb9f90a99faa6d6ab07558d65dd113728ff1 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Mon, 14 Jun 2021 22:02:42 -0300 Subject: [PATCH 556/770] vk_graphics_pipeline: Fix path with no VK_EXT_extended_dynamic_state --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ccef71f4c..e02b1b7ab 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,7 +472,7 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (key.state.extended_dynamic_state) { + if (!key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; From 8fb204893430de2d5c30e008e98db313f890f447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 02:43:01 -0300 Subject: [PATCH 557/770] vk_rasterizer: Exit render passes on fragment barriers --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 855c17769..c57e16c50 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -440,6 +440,7 @@ void RasterizerVulkan::WaitForIdle() { void RasterizerVulkan::FragmentBarrier() { // We already put barriers when a render pass finishes + scheduler.RequestOutsideRenderPassOperationContext(); } void RasterizerVulkan::TiledCacheBarrier() { From 7ac55c2a750f00b41582a86eba5a44dcd781ae98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 17:00:07 -0300 Subject: [PATCH 558/770] shader: Fix loop safety to SSA pass --- .../frontend/maxwell/structured_control_flow.cpp | 4 +++- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index b2b8c492a..605ec38e1 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -750,7 +750,9 @@ private: } else { IR::Block* const init_block{block_pool.Create(inst_pool)}; IR::IREmitter ir{*init_block}; - ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + static constexpr u32 SAFETY_THRESHOLD = 0x2000; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); if (current_block) { current_block->AddBranch(init_block); diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a4ba393ef..fff25c4a2 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -338,7 +338,7 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; case IR::Opcode::SetLoopSafetyVariable: - pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); From 3d822faea1af9cab2e58fcd9edcec09940e290a4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 17:49:33 -0300 Subject: [PATCH 559/770] spirv: Reduce log severity of mismatching denorm rules --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 14a99750d..fd59b4d0a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -294,7 +294,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit Id main_func) { const Info& info{program.info}; if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - LOG_ERROR(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); + LOG_WARNING(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -315,7 +315,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit return; } if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - LOG_ERROR(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); + LOG_WARNING(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); From eaff1030de07f3739794207403ea833ee91c0034 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 19 May 2021 21:58:32 -0400 Subject: [PATCH 560/770] glsl: Initial backend --- src/shader_recompiler/CMakeLists.txt | 26 + .../backend/glsl/emit_context.cpp | 30 + .../backend/glsl/emit_context.h | 62 + .../backend/glsl/emit_glsl.cpp | 156 ++ .../backend/glsl/emit_glsl.h | 23 + .../backend/glsl/emit_glsl_atomic.cpp | 0 .../backend/glsl/emit_glsl_barriers.cpp | 0 .../glsl/emit_glsl_bitwise_conversion.cpp | 0 .../backend/glsl/emit_glsl_composite.cpp | 0 .../glsl/emit_glsl_context_get_set.cpp | 48 + .../backend/glsl/emit_glsl_control_flow.cpp | 0 .../backend/glsl/emit_glsl_convert.cpp | 0 .../backend/glsl/emit_glsl_floating_point.cpp | 0 .../backend/glsl/emit_glsl_image.cpp | 0 .../backend/glsl/emit_glsl_image_atomic.cpp | 0 .../backend/glsl/emit_glsl_instructions.h | 656 +++++ .../backend/glsl/emit_glsl_integer.cpp | 0 .../backend/glsl/emit_glsl_logical.cpp | 0 .../backend/glsl/emit_glsl_memory.cpp | 0 .../glsl/emit_glsl_not_implemented.cpp | 2149 +++++++++++++++++ .../backend/glsl/emit_glsl_select.cpp | 0 .../backend/glsl/emit_glsl_shared_memory.cpp | 0 .../backend/glsl/emit_glsl_special.cpp | 0 .../backend/glsl/emit_glsl_undefined.cpp | 0 .../backend/glsl/emit_glsl_warp.cpp | 0 .../backend/glsl/reg_alloc.cpp | 96 + .../backend/glsl/reg_alloc.h | 46 + .../renderer_opengl/gl_shader_cache.cpp | 7 +- 28 files changed, 3297 insertions(+), 2 deletions(-) create mode 100644 src/shader_recompiler/backend/glsl/emit_context.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_context.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_instructions.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_select.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_special.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp create mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.cpp create mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d6d8e5f59..9b2240931 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -24,6 +24,32 @@ add_library(shader_recompiler STATIC backend/glasm/emit_glasm_warp.cpp backend/glasm/reg_alloc.cpp backend/glasm/reg_alloc.h + backend/glsl/emit_context.cpp + backend/glsl/emit_context.h + backend/glsl/emit_glsl.cpp + backend/glsl/emit_glsl.h + backend/glsl/emit_glsl_atomic.cpp + backend/glsl/emit_glsl_barriers.cpp + backend/glsl/emit_glsl_bitwise_conversion.cpp + backend/glsl/emit_glsl_composite.cpp + backend/glsl/emit_glsl_context_get_set.cpp + backend/glsl/emit_glsl_control_flow.cpp + backend/glsl/emit_glsl_convert.cpp + backend/glsl/emit_glsl_floating_point.cpp + backend/glsl/emit_glsl_image.cpp + backend/glsl/emit_glsl_image_atomic.cpp + backend/glsl/emit_glsl_instructions.h + backend/glsl/emit_glsl_integer.cpp + backend/glsl/emit_glsl_logical.cpp + backend/glsl/emit_glsl_memory.cpp + backend/glsl/emit_glsl_not_implemented.cpp + backend/glsl/emit_glsl_select.cpp + backend/glsl/emit_glsl_shared_memory.cpp + backend/glsl/emit_glsl_special.cpp + backend/glsl/emit_glsl_undefined.cpp + backend/glsl/emit_glsl_warp.cpp + backend/glsl/reg_alloc.cpp + backend/glsl/reg_alloc.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..e2a9885f0 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::GLSL { + +EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindings, + const Profile& profile_) + : info{program.info}, profile{profile_} { + std::string header = "#version 450 core\n"; + header += "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;"; + code += header; + DefineConstantBuffers(); + code += "void main(){"; +} + +void EmitContext::DefineConstantBuffers() { + if (info.constant_buffer_descriptors.empty()) { + return; + } + for (const auto& desc : info.constant_buffer_descriptors) { + Add("uniform uint c{}[{}];", desc.index, desc.count); + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..ffc97007d --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + + template + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + template + void Add(const char* format_str, Args&&... args) { + code += fmt::format(format_str, std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string AllocVar() { + return fmt::format("var_{}", var_num++); + } + + std::string code; + RegAlloc reg_alloc; + const Info& info; + const Profile& profile; + u64 var_num{}; + +private: + void DefineConstantBuffers(); +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..bb1d8b272 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -0,0 +1,156 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +#pragma optimize("", off) +namespace Shader::Backend::GLSL { +namespace { +template +struct FuncTraits {}; + +template +struct FuncTraits { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template + using ArgType = std::tuple_element_t>; +}; + +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition(func(ctx, std::forward(args)...)); +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.reg_alloc.Consume(arg); + } else if constexpr (std::is_same_v) { + return *arg.Inst(); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } else if constexpr (std::is_same_v) { + return arg.Reg(); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("if ("); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("){{"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("while ("); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (node.data.repeat.cond.IsImmediate()) { + if (node.data.repeat.cond.U1()) { + ctx.Add("ENDREP;"); + } else { + ctx.Add("BRK;" + "ENDREP;"); + } + } + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("break;"); + } + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("return;"); + break; + default: + ctx.Add("UNAHNDLED {}", node.type); + break; + } + } +} + +} // Anonymous namespace + +std::string EmitGLSL(const Profile& profile, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings, profile}; + // ctx.SetupBuffers(); + EmitCode(ctx, program); + ctx.code += "}"; + return ctx.code; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..a7c666107 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +[[nodiscard]] std::string EmitGLSL(const Profile& profile, IR::Program& program, + Bindings& binding); + +[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitGLSL(profile, program, binding); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..7b2ed358e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + const auto var{ctx.AllocVar()}; + ctx.Add("uint {} = c{}[{}];", var, binding.U32(), offset.U32()); +} + +void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..1d86820dc --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -0,0 +1,656 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +class EmitContext; + +inline void EmitSetLoopSafetyVariable(EmitContext&) {} +inline void EmitGetLoopSafetyVariable(EmitContext&) {} + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitBranch(EmitContext& ctx, std::string_view label); +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label); +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx); +void EmitIsHelperInvocation(EmitContext& ctx); +void EmitYDirection(EmitContext& ctx); +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx); +void EmitUndefU8(EmitContext& ctx); +void EmitUndefU16(EmitContext& ctx); +void EmitUndefU32(EmitContext& ctx); +void EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx); +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, std::string_view value); +void EmitFPSin(EmitContext& ctx, std::string_view value); +void EmitFPCos(EmitContext& ctx, std::string_view value); +void EmitFPExp2(EmitContext& ctx, std::string_view value); +void EmitFPLog2(EmitContext& ctx, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, std::string_view value); +void EmitINeg64(EmitContext& ctx, std::string_view value); +void EmitIAbs32(EmitContext& ctx, std::string_view value); +void EmitIAbs64(EmitContext& ctx, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, std::string_view value); +void EmitBitCount32(EmitContext& ctx, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, std::string_view value); +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx); +void EmitVoteAll(EmitContext& ctx, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx); +void EmitSubgroupLtMask(EmitContext& ctx); +void EmitSubgroupLeMask(EmitContext& ctx); +void EmitSubgroupGtMask(EmitContext& ctx); +void EmitSubgroupGeMask(EmitContext& ctx); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..8bd40458b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -0,0 +1,2149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLSL { + +static void NotImplemented() { + throw NotImplementedException("GLSL instruction"); +} + +void EmitPhi(EmitContext& ctx, IR::Inst* inst) { + NotImplemented(); +} + +void EmitVoid(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIdentity(EmitContext& ctx, const IR::Value& value) { + const auto var{ctx.AllocVar()}; + switch (value.Type()) { + case IR::Type::U32: + ctx.Add("uint {}={};", var, value.U32()); + break; + default: + ctx.Add("EmitIdentity {}", value.Type()); + break; + } +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + NotImplemented(); +} + +void EmitReference(EmitContext&) { + NotImplemented(); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { + NotImplemented(); +} + +void EmitBranch(EmitContext& ctx, std::string_view label) { + NotImplemented(); +} + +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label) { + NotImplemented(); +} + +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, + std::string_view continue_label) { + NotImplemented(); +} + +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { + NotImplemented(); +} + +void EmitReturn(EmitContext& ctx) { + NotImplemented(); +} + +void EmitJoin(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUnreachable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { + NotImplemented(); +} + +void EmitBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPrologue(EmitContext& ctx) { + // NotImplemented(); +} + +void EmitEpilogue(EmitContext& ctx) { + // NotImplemented(); +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + NotImplemented(); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + NotImplemented(); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLocalInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSampleId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIsHelperInvocation(EmitContext& ctx) { + NotImplemented(); +} + +void EmitYDirection(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { + NotImplemented(); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + NotImplemented(); +} + +void EmitUndefU1(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitWriteGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + // ctx.Add("c{}[{}]={}", binding.U32() + 2, offset.U32(), 16); + ctx.Add("EmitWriteStorage32"); + // ctx.Add("c{}[{}]={}", binding.U32(), offset.U32(), value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitBitCastU16F16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastU64F64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF16U16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastF64U64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitFPAbs16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPNeg16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSin(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCos(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPExp2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPLog2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitINeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitINeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitReverse32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCount32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindSMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindUMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalNot(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + NotImplemented(); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + NotImplemented(); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms) { + NotImplemented(); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod) { + NotImplemented(); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp) { + NotImplemented(); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitLaneId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitVoteAll(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteAny(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupEqMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + NotImplemented(); +} + +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp new file mode 100644 index 000000000..591a87988 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -0,0 +1,96 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr std::string_view SWIZZLE = "xyzw"; + +std::string Representation(Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spilling"); + } + const u32 num_elements{id.num_elements_minus_one + 1}; + const u32 index{static_cast(id.index)}; + if (num_elements == 4) { + return fmt::format("R{}", index); + } else { + return fmt::format("R{}.{}", index, SWIZZLE.substr(id.base_element, num_elements)); + } +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}", value.U32()); + case IR::Type::F32: + return fmt::format("{}", value.F32()); + case IR::Type::U64: + return fmt::format("{}", value.U64()); + case IR::Type::F64: + return fmt::format("{}", value.F64()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { + const Id id{Alloc(num_elements, alignment)}; + inst.SetDefinition(id); + return Representation(id); +} + +std::string RegAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : Consume(*value.Inst()); +} + +std::string RegAlloc::Consume(IR::Inst& inst) { + const Id id{inst.Definition()}; + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(id); + } + return Representation(inst.Definition()); +} + +Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (register_use[reg]) { + continue; + } + num_used_registers = std::max(num_used_registers, reg + 1); + register_use[reg] = true; + return Id{ + .base_element = 0, + .num_elements_minus_one = num_elements - 1, + .index = static_cast(reg), + .is_spill = 0, + .is_condition_code = 0, + }; + } + throw NotImplementedException("Register spilling"); +} + +void RegAlloc::Free(Id id) { + if (id.is_spill != 0) { + throw NotImplementedException("Free spill"); + } + register_use[id.index] = false; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h new file mode 100644 index 000000000..850a93d6a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +struct Id { + u32 base_element : 2; + u32 num_elements_minus_one : 2; + u32 index : 26; + u32 is_spill : 1; + u32 is_condition_code : 1; +}; + +class RegAlloc { +public: + std::string Define(IR::Inst& inst, u32 num_elements = 1, u32 alignment = 1); + + std::string Consume(const IR::Value& value); + +private: + static constexpr size_t NUM_REGS = 4096; + static constexpr size_t NUM_ELEMENTS = 4; + + std::string Consume(IR::Inst& inst); + + Id Alloc(u32 num_elements, u32 alignment); + + void Free(Id id); + + size_t num_used_registers{}; + std::bitset register_use{}; +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9391a4cd9..4387532ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -19,6 +19,7 @@ #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -40,6 +41,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; +using Shader::Backend::GLSL::EmitGLSL; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; + const auto code{EmitGLSL(profile, program, binding)}; + OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } @@ -489,7 +492,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { - const std::vector code{EmitSPIRV(profile, program)}; + const auto code{EmitGLSL(profile, program)}; source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); From 64337f004d9249c4408fec75bd1bbcc0f2a1408d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 20 May 2021 23:38:38 -0400 Subject: [PATCH 561/770] glsl: Fix "reg" allocing based on glasm with some tweaks --- .../backend/glsl/emit_context.cpp | 28 +- .../backend/glsl/emit_context.h | 6 +- .../backend/glsl/emit_glsl.cpp | 2 +- .../glsl/emit_glsl_bitwise_conversion.cpp | 28 + .../glsl/emit_glsl_context_get_set.cpp | 7 +- .../backend/glsl/emit_glsl_instructions.h | 783 ++++++++-------- .../backend/glsl/emit_glsl_memory.cpp | 99 +++ .../glsl/emit_glsl_not_implemented.cpp | 840 ++++++++---------- .../backend/glsl/reg_alloc.cpp | 37 +- .../backend/glsl/reg_alloc.h | 6 + 10 files changed, 938 insertions(+), 898 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e2a9885f0..b0e46cc07 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -11,19 +11,39 @@ namespace Shader::Backend::GLSL { EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindings, const Profile& profile_) : info{program.info}, profile{profile_} { - std::string header = "#version 450 core\n"; - header += "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;"; + std::string header = "#version 450\n"; + if (program.stage == Stage::Compute) { + header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", + program.workgroup_size[0], program.workgroup_size[1], + program.workgroup_size[2]); + } code += header; DefineConstantBuffers(); - code += "void main(){"; + DefineStorageBuffers(); + code += "void main(){\n"; } void EmitContext::DefineConstantBuffers() { if (info.constant_buffer_descriptors.empty()) { return; } + u32 binding{}; for (const auto& desc : info.constant_buffer_descriptors) { - Add("uniform uint c{}[{}];", desc.index, desc.count); + Add("layout(std140,binding={}) uniform cbuf_{}{{uint cbuf{}[];}};", binding, binding, + desc.index, desc.count); + ++binding; + } +} + +void EmitContext::DefineStorageBuffers() { + if (info.storage_buffers_descriptors.empty()) { + return; + } + u32 binding{}; + for (const auto& desc : info.storage_buffers_descriptors) { + Add("layout(std430,binding={}) buffer buff_{}{{uint buff{}[];}};", binding, binding, + desc.cbuf_index, desc.count); + ++binding; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index ffc97007d..8d093a853 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -45,18 +45,14 @@ public: code += '\n'; } - std::string AllocVar() { - return fmt::format("var_{}", var_num++); - } - std::string code; RegAlloc reg_alloc; const Info& info; const Profile& profile; - u64 var_num{}; private: void DefineConstantBuffers(); + void DefineStorageBuffers(); }; } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index bb1d8b272..77c93146e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -35,7 +35,7 @@ void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { template ArgType Arg(EmitContext& ctx, const IR::Value& arg) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return ctx.reg_alloc.Consume(arg); } else if constexpr (std::is_same_v) { return *arg.Inst(); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index e69de29bb..7c654e4e7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +static void Alias(IR::Inst& inst, const IR::Value& value) { + if (value.IsImmediate()) { + return; + } + IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; + value_inst.DestructiveAddUsage(inst.UseCount()); + value_inst.DestructiveRemoveUsage(); + inst.SetDefinition(value_inst.Definition()); +} +} // namespace + +void EmitIdentity(EmitContext&, IR::Inst* inst, const IR::Value& value) { + Alias(*inst, value); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 7b2ed358e..e45dfa42e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -30,10 +30,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR throw NotImplementedException("GLSL"); } -void EmitGetCbufU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { - const auto var{ctx.AllocVar()}; - ctx.Add("uint {} = c{}[{}];", var, binding.U32(), offset.U32()); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& binding, + const IR::Value& offset) { + ctx.Add("uint {}=cbuf{}[{}];", *inst, binding.U32(), offset.U32()); } void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 1d86820dc..16e01c81c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -15,6 +15,8 @@ class Inst; class Value; } // namespace Shader::IR +#pragma optimize("", off) + namespace Shader::Backend::GLSL { class EmitContext; @@ -25,19 +27,19 @@ inline void EmitGetLoopSafetyVariable(EmitContext&) {} // Microinstruction emitters void EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); -void EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitIdentity(EmitContext& ctx, IR::Inst* inst, const IR::Value& value); void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); -void EmitBranch(EmitContext& ctx, std::string_view label); -void EmitBranchConditional(EmitContext& ctx, std::string_view condition, - std::string_view true_label, std::string_view false_label); -void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); -void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); +void EmitBranch(EmitContext& ctx, std::string label); +void EmitBranchConditional(EmitContext& ctx, std::string condition, std::string true_label, + std::string false_label); +void EmitLoopMerge(EmitContext& ctx, std::string merge_label, std::string continue_label); +void EmitSelectionMerge(EmitContext& ctx, std::string merge_label); void EmitReturn(EmitContext& ctx); void EmitJoin(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string continue_label); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); @@ -57,20 +59,20 @@ void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& binding, + const IR::Value& offset); void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex); -void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, - std::string_view vertex); +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string value, std::string vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, std::string offset, std::string vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string offset, std::string value, + std::string vertex); void EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); -void EmitSetSampleMask(EmitContext& ctx, std::string_view value); -void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string value); +void EmitSetSampleMask(EmitContext& ctx, std::string value); +void EmitSetFragDepth(EmitContext& ctx, std::string value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); @@ -85,8 +87,8 @@ void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx); -void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); -void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitLoadLocal(EmitContext& ctx, std::string word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string word_offset, std::string value); void EmitUndefU1(EmitContext& ctx); void EmitUndefU8(EmitContext& ctx); void EmitUndefU16(EmitContext& ctx); @@ -96,16 +98,16 @@ void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); -void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); -void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal32(EmitContext& ctx, std::string address); +void EmitLoadGlobal64(EmitContext& ctx, std::string address); +void EmitLoadGlobal128(EmitContext& ctx, std::string address); void EmitWriteGlobalU8(EmitContext& ctx); void EmitWriteGlobalS8(EmitContext& ctx); void EmitWriteGlobalU16(EmitContext& ctx); void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal32(EmitContext& ctx, std::string address, std::string value); +void EmitWriteGlobal64(EmitContext& ctx, std::string address, std::string value); +void EmitWriteGlobal128(EmitContext& ctx, std::string address, std::string value); void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); @@ -114,72 +116,69 @@ void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Val void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); -void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + std::string value); +void EmitLoadSharedU8(EmitContext& ctx, std::string offset); +void EmitLoadSharedS8(EmitContext& ctx, std::string offset); +void EmitLoadSharedU16(EmitContext& ctx, std::string offset); +void EmitLoadSharedS16(EmitContext& ctx, std::string offset); +void EmitLoadSharedU32(EmitContext& ctx, std::string offset); +void EmitLoadSharedU64(EmitContext& ctx, std::string offset); +void EmitLoadSharedU128(EmitContext& ctx, std::string offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string offset, std::string value); +void EmitWriteSharedU16(EmitContext& ctx, std::string offset, std::string value); +void EmitWriteSharedU32(EmitContext& ctx, std::string offset, std::string value); +void EmitWriteSharedU64(EmitContext& ctx, std::string offset, std::string value); +void EmitWriteSharedU128(EmitContext& ctx, std::string offset, std::string value); +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string e1, std::string e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, + std::string e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string e1, std::string e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, + std::string e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string e1, std::string e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, + std::string e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string composite, std::string object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); @@ -187,264 +186,249 @@ void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string composite, std::string object, u32 index); -void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectU1(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectU8(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectU16(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectU32(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectU64(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectF16(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectF32(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); +void EmitSelectF64(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value); void EmitBitCastU16F16(EmitContext& ctx); -void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); +void EmitBitCastU32F32(EmitContext& ctx, std::string value); void EmitBitCastU64F64(EmitContext& ctx); void EmitBitCastF16U16(EmitContext& ctx); -void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); +void EmitBitCastF32U32(EmitContext& ctx, std::string value); void EmitBitCastF64U64(EmitContext& ctx); -void EmitPackUint2x32(EmitContext& ctx, std::string_view value); -void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); -void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); -void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitPackUint2x32(EmitContext& ctx, std::string value); +void EmitUnpackUint2x32(EmitContext& ctx, std::string value); +void EmitPackFloat2x16(EmitContext& ctx, std::string value); +void EmitUnpackFloat2x16(EmitContext& ctx, std::string value); +void EmitPackHalf2x16(EmitContext& ctx, std::string value); +void EmitUnpackHalf2x16(EmitContext& ctx, std::string value); +void EmitPackDouble2x32(EmitContext& ctx, std::string value); +void EmitUnpackDouble2x32(EmitContext& ctx, std::string value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); void EmitGetInBoundsFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx, std::string_view value); -void EmitFPAbs32(EmitContext& ctx, std::string_view value); -void EmitFPAbs64(EmitContext& ctx, std::string_view value); -void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c); -void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c); -void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c); -void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitFPNeg16(EmitContext& ctx, std::string_view value); -void EmitFPNeg32(EmitContext& ctx, std::string_view value); -void EmitFPNeg64(EmitContext& ctx, std::string_view value); -void EmitFPSin(EmitContext& ctx, std::string_view value); -void EmitFPCos(EmitContext& ctx, std::string_view value); -void EmitFPExp2(EmitContext& ctx, std::string_view value); -void EmitFPLog2(EmitContext& ctx, std::string_view value); -void EmitFPRecip32(EmitContext& ctx, std::string_view value); -void EmitFPRecip64(EmitContext& ctx, std::string_view value); -void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); -void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); -void EmitFPSqrt(EmitContext& ctx, std::string_view value); -void EmitFPSaturate16(EmitContext& ctx, std::string_view value); -void EmitFPSaturate32(EmitContext& ctx, std::string_view value); -void EmitFPSaturate64(EmitContext& ctx, std::string_view value); -void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); -void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); -void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); -void EmitFPFloor16(EmitContext& ctx, std::string_view value); -void EmitFPFloor32(EmitContext& ctx, std::string_view value); -void EmitFPFloor64(EmitContext& ctx, std::string_view value); -void EmitFPCeil16(EmitContext& ctx, std::string_view value); -void EmitFPCeil32(EmitContext& ctx, std::string_view value); -void EmitFPCeil64(EmitContext& ctx, std::string_view value); -void EmitFPTrunc16(EmitContext& ctx, std::string_view value); -void EmitFPTrunc32(EmitContext& ctx, std::string_view value); -void EmitFPTrunc64(EmitContext& ctx, std::string_view value); -void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPIsNan16(EmitContext& ctx, std::string_view value); -void EmitFPIsNan32(EmitContext& ctx, std::string_view value); -void EmitFPIsNan64(EmitContext& ctx, std::string_view value); -void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitINeg32(EmitContext& ctx, std::string_view value); -void EmitINeg64(EmitContext& ctx, std::string_view value); -void EmitIAbs32(EmitContext& ctx, std::string_view value); -void EmitIAbs64(EmitContext& ctx, std::string_view value); -void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); -void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, - std::string_view offset, std::string_view count); -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, - std::string_view offset, std::string_view count); -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, - std::string_view offset, std::string_view count); -void EmitBitReverse32(EmitContext& ctx, std::string_view value); -void EmitBitCount32(EmitContext& ctx, std::string_view value); -void EmitBitwiseNot32(EmitContext& ctx, std::string_view value); -void EmitFindSMsb32(EmitContext& ctx, std::string_view value); -void EmitFindUMsb32(EmitContext& ctx, std::string_view value); -void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, - std::string_view max); -void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, - std::string_view max); -void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value); +void EmitFPAbs16(EmitContext& ctx, std::string value); +void EmitFPAbs32(EmitContext& ctx, std::string value); +void EmitFPAbs64(EmitContext& ctx, std::string value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c); +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c); +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c); +void EmitFPMax32(EmitContext& ctx, std::string a, std::string b); +void EmitFPMax64(EmitContext& ctx, std::string a, std::string b); +void EmitFPMin32(EmitContext& ctx, std::string a, std::string b); +void EmitFPMin64(EmitContext& ctx, std::string a, std::string b); +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPNeg16(EmitContext& ctx, std::string value); +void EmitFPNeg32(EmitContext& ctx, std::string value); +void EmitFPNeg64(EmitContext& ctx, std::string value); +void EmitFPSin(EmitContext& ctx, std::string value); +void EmitFPCos(EmitContext& ctx, std::string value); +void EmitFPExp2(EmitContext& ctx, std::string value); +void EmitFPLog2(EmitContext& ctx, std::string value); +void EmitFPRecip32(EmitContext& ctx, std::string value); +void EmitFPRecip64(EmitContext& ctx, std::string value); +void EmitFPRecipSqrt32(EmitContext& ctx, std::string value); +void EmitFPRecipSqrt64(EmitContext& ctx, std::string value); +void EmitFPSqrt(EmitContext& ctx, std::string value); +void EmitFPSaturate16(EmitContext& ctx, std::string value); +void EmitFPSaturate32(EmitContext& ctx, std::string value); +void EmitFPSaturate64(EmitContext& ctx, std::string value); +void EmitFPClamp16(EmitContext& ctx, std::string value, std::string min_value, + std::string max_value); +void EmitFPClamp32(EmitContext& ctx, std::string value, std::string min_value, + std::string max_value); +void EmitFPClamp64(EmitContext& ctx, std::string value, std::string min_value, + std::string max_value); +void EmitFPRoundEven16(EmitContext& ctx, std::string value); +void EmitFPRoundEven32(EmitContext& ctx, std::string value); +void EmitFPRoundEven64(EmitContext& ctx, std::string value); +void EmitFPFloor16(EmitContext& ctx, std::string value); +void EmitFPFloor32(EmitContext& ctx, std::string value); +void EmitFPFloor64(EmitContext& ctx, std::string value); +void EmitFPCeil16(EmitContext& ctx, std::string value); +void EmitFPCeil32(EmitContext& ctx, std::string value); +void EmitFPCeil64(EmitContext& ctx, std::string value); +void EmitFPTrunc16(EmitContext& ctx, std::string value); +void EmitFPTrunc32(EmitContext& ctx, std::string value); +void EmitFPTrunc64(EmitContext& ctx, std::string value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitFPIsNan16(EmitContext& ctx, std::string value); +void EmitFPIsNan32(EmitContext& ctx, std::string value); +void EmitFPIsNan64(EmitContext& ctx, std::string value); +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitIAdd64(EmitContext& ctx, std::string a, std::string b); +void EmitISub32(EmitContext& ctx, std::string a, std::string b); +void EmitISub64(EmitContext& ctx, std::string a, std::string b); +void EmitIMul32(EmitContext& ctx, std::string a, std::string b); +void EmitINeg32(EmitContext& ctx, std::string value); +void EmitINeg64(EmitContext& ctx, std::string value); +void EmitIAbs32(EmitContext& ctx, std::string value); +void EmitIAbs64(EmitContext& ctx, std::string value); +void EmitShiftLeftLogical32(EmitContext& ctx, std::string base, std::string shift); +void EmitShiftLeftLogical64(EmitContext& ctx, std::string base, std::string shift); +void EmitShiftRightLogical32(EmitContext& ctx, std::string base, std::string shift); +void EmitShiftRightLogical64(EmitContext& ctx, std::string base, std::string shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string base, std::string shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string base, std::string shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitBitFieldInsert(EmitContext& ctx, std::string base, std::string insert, std::string offset, + std::string count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, + std::string count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, + std::string count); +void EmitBitReverse32(EmitContext& ctx, std::string value); +void EmitBitCount32(EmitContext& ctx, std::string value); +void EmitBitwiseNot32(EmitContext& ctx, std::string value); +void EmitFindSMsb32(EmitContext& ctx, std::string value); +void EmitFindUMsb32(EmitContext& ctx, std::string value); +void EmitSMin32(EmitContext& ctx, std::string a, std::string b); +void EmitUMin32(EmitContext& ctx, std::string a, std::string b); +void EmitSMax32(EmitContext& ctx, std::string a, std::string b); +void EmitUMax32(EmitContext& ctx, std::string a, std::string b); +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, + std::string max); +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, + std::string max); +void EmitSLessThan(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitULessThan(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitIEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitSLessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitULessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitSGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitUGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitINotEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicInc32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicDec32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicOr32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicXor32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string pointer_offset, std::string value); void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value); + const IR::Value& offset, std::string value); void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value); + const IR::Value& offset, std::string value); void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); + std::string value); void EmitGlobalAtomicIAdd32(EmitContext& ctx); void EmitGlobalAtomicSMin32(EmitContext& ctx); void EmitGlobalAtomicUMin32(EmitContext& ctx); @@ -474,58 +458,58 @@ void EmitGlobalAtomicMinF16x2(EmitContext& ctx); void EmitGlobalAtomicMinF32x2(EmitContext& ctx); void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalNot(EmitContext& ctx, std::string_view value); -void EmitConvertS16F16(EmitContext& ctx, std::string_view value); -void EmitConvertS16F32(EmitContext& ctx, std::string_view value); -void EmitConvertS16F64(EmitContext& ctx, std::string_view value); -void EmitConvertS32F16(EmitContext& ctx, std::string_view value); -void EmitConvertS32F32(EmitContext& ctx, std::string_view value); -void EmitConvertS32F64(EmitContext& ctx, std::string_view value); -void EmitConvertS64F16(EmitContext& ctx, std::string_view value); -void EmitConvertS64F32(EmitContext& ctx, std::string_view value); -void EmitConvertS64F64(EmitContext& ctx, std::string_view value); -void EmitConvertU16F16(EmitContext& ctx, std::string_view value); -void EmitConvertU16F32(EmitContext& ctx, std::string_view value); -void EmitConvertU16F64(EmitContext& ctx, std::string_view value); -void EmitConvertU32F16(EmitContext& ctx, std::string_view value); -void EmitConvertU32F32(EmitContext& ctx, std::string_view value); -void EmitConvertU32F64(EmitContext& ctx, std::string_view value); -void EmitConvertU64F16(EmitContext& ctx, std::string_view value); -void EmitConvertU64F32(EmitContext& ctx, std::string_view value); -void EmitConvertU64F64(EmitContext& ctx, std::string_view value); -void EmitConvertU64U32(EmitContext& ctx, std::string_view value); -void EmitConvertU32U64(EmitContext& ctx, std::string_view value); -void EmitConvertF16F32(EmitContext& ctx, std::string_view value); -void EmitConvertF32F16(EmitContext& ctx, std::string_view value); -void EmitConvertF32F64(EmitContext& ctx, std::string_view value); -void EmitConvertF64F32(EmitContext& ctx, std::string_view value); -void EmitConvertF16S8(EmitContext& ctx, std::string_view value); -void EmitConvertF16S16(EmitContext& ctx, std::string_view value); -void EmitConvertF16S32(EmitContext& ctx, std::string_view value); -void EmitConvertF16S64(EmitContext& ctx, std::string_view value); -void EmitConvertF16U8(EmitContext& ctx, std::string_view value); -void EmitConvertF16U16(EmitContext& ctx, std::string_view value); -void EmitConvertF16U32(EmitContext& ctx, std::string_view value); -void EmitConvertF16U64(EmitContext& ctx, std::string_view value); -void EmitConvertF32S8(EmitContext& ctx, std::string_view value); -void EmitConvertF32S16(EmitContext& ctx, std::string_view value); -void EmitConvertF32S32(EmitContext& ctx, std::string_view value); -void EmitConvertF32S64(EmitContext& ctx, std::string_view value); -void EmitConvertF32U8(EmitContext& ctx, std::string_view value); -void EmitConvertF32U16(EmitContext& ctx, std::string_view value); -void EmitConvertF32U32(EmitContext& ctx, std::string_view value); -void EmitConvertF32U64(EmitContext& ctx, std::string_view value); -void EmitConvertF64S8(EmitContext& ctx, std::string_view value); -void EmitConvertF64S16(EmitContext& ctx, std::string_view value); -void EmitConvertF64S32(EmitContext& ctx, std::string_view value); -void EmitConvertF64S64(EmitContext& ctx, std::string_view value); -void EmitConvertF64U8(EmitContext& ctx, std::string_view value); -void EmitConvertF64U16(EmitContext& ctx, std::string_view value); -void EmitConvertF64U32(EmitContext& ctx, std::string_view value); -void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitLogicalOr(EmitContext& ctx, std::string a, std::string b); +void EmitLogicalAnd(EmitContext& ctx, std::string a, std::string b); +void EmitLogicalXor(EmitContext& ctx, std::string a, std::string b); +void EmitLogicalNot(EmitContext& ctx, std::string value); +void EmitConvertS16F16(EmitContext& ctx, std::string value); +void EmitConvertS16F32(EmitContext& ctx, std::string value); +void EmitConvertS16F64(EmitContext& ctx, std::string value); +void EmitConvertS32F16(EmitContext& ctx, std::string value); +void EmitConvertS32F32(EmitContext& ctx, std::string value); +void EmitConvertS32F64(EmitContext& ctx, std::string value); +void EmitConvertS64F16(EmitContext& ctx, std::string value); +void EmitConvertS64F32(EmitContext& ctx, std::string value); +void EmitConvertS64F64(EmitContext& ctx, std::string value); +void EmitConvertU16F16(EmitContext& ctx, std::string value); +void EmitConvertU16F32(EmitContext& ctx, std::string value); +void EmitConvertU16F64(EmitContext& ctx, std::string value); +void EmitConvertU32F16(EmitContext& ctx, std::string value); +void EmitConvertU32F32(EmitContext& ctx, std::string value); +void EmitConvertU32F64(EmitContext& ctx, std::string value); +void EmitConvertU64F16(EmitContext& ctx, std::string value); +void EmitConvertU64F32(EmitContext& ctx, std::string value); +void EmitConvertU64F64(EmitContext& ctx, std::string value); +void EmitConvertU64U32(EmitContext& ctx, std::string value); +void EmitConvertU32U64(EmitContext& ctx, std::string value); +void EmitConvertF16F32(EmitContext& ctx, std::string value); +void EmitConvertF32F16(EmitContext& ctx, std::string value); +void EmitConvertF32F64(EmitContext& ctx, std::string value); +void EmitConvertF64F32(EmitContext& ctx, std::string value); +void EmitConvertF16S8(EmitContext& ctx, std::string value); +void EmitConvertF16S16(EmitContext& ctx, std::string value); +void EmitConvertF16S32(EmitContext& ctx, std::string value); +void EmitConvertF16S64(EmitContext& ctx, std::string value); +void EmitConvertF16U8(EmitContext& ctx, std::string value); +void EmitConvertF16U16(EmitContext& ctx, std::string value); +void EmitConvertF16U32(EmitContext& ctx, std::string value); +void EmitConvertF16U64(EmitContext& ctx, std::string value); +void EmitConvertF32S8(EmitContext& ctx, std::string value); +void EmitConvertF32S16(EmitContext& ctx, std::string value); +void EmitConvertF32S32(EmitContext& ctx, std::string value); +void EmitConvertF32S64(EmitContext& ctx, std::string value); +void EmitConvertF32U8(EmitContext& ctx, std::string value); +void EmitConvertF32U16(EmitContext& ctx, std::string value); +void EmitConvertF32U32(EmitContext& ctx, std::string value); +void EmitConvertF32U64(EmitContext& ctx, std::string value); +void EmitConvertF64S8(EmitContext& ctx, std::string value); +void EmitConvertF64S16(EmitContext& ctx, std::string value); +void EmitConvertF64S32(EmitContext& ctx, std::string value); +void EmitConvertF64S64(EmitContext& ctx, std::string value); +void EmitConvertF64U8(EmitContext& ctx, std::string value); +void EmitConvertF64U16(EmitContext& ctx, std::string value); +void EmitConvertF64U32(EmitContext& ctx, std::string value); +void EmitConvertF64U64(EmitContext& ctx, std::string value); void EmitBindlessImageSampleImplicitLod(EmitContext&); void EmitBindlessImageSampleExplicitLod(EmitContext&); void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); @@ -551,36 +535,31 @@ void EmitBoundImageGradient(EmitContext&); void EmitBoundImageRead(EmitContext&); void EmitBoundImageWrite(EmitContext&); void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset); + std::string coords, std::string bias_lc, const IR::Value& offset); void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset); + std::string coords, std::string lod_lc, const IR::Value& offset); void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset); + std::string coords, std::string dref, std::string bias_lc, + const IR::Value& offset); void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2); + std::string coords, std::string dref, std::string lod_lc, + const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + const IR::Value& offset, const IR::Value& offset2); void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms); + std::string coords, const IR::Value& offset, const IR::Value& offset2, + std::string dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + std::string offset, std::string lod, std::string ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view lod); + std::string lod); void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords); -void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp); -void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view color); + std::string coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + std::string derivates, std::string offset, std::string lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + std::string color); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); @@ -604,53 +583,49 @@ void EmitBoundImageAtomicOr32(EmitContext&); void EmitBoundImageAtomicXor32(EmitContext&); void EmitBoundImageAtomicExchange32(EmitContext&); void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value); + std::string coords, std::string value); void EmitLaneId(EmitContext& ctx); -void EmitVoteAll(EmitContext& ctx, std::string_view pred); -void EmitVoteAny(EmitContext& ctx, std::string_view pred); -void EmitVoteEqual(EmitContext& ctx, std::string_view pred); -void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); +void EmitVoteAll(EmitContext& ctx, std::string pred); +void EmitVoteAny(EmitContext& ctx, std::string pred); +void EmitVoteEqual(EmitContext& ctx, std::string pred); +void EmitSubgroupBallot(EmitContext& ctx, std::string pred); void EmitSubgroupEqMask(EmitContext& ctx); void EmitSubgroupLtMask(EmitContext& ctx); void EmitSubgroupLeMask(EmitContext& ctx); void EmitSubgroupGtMask(EmitContext& ctx); void EmitSubgroupGeMask(EmitContext& ctx); -void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask); -void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask); -void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask); -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask); -void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, - std::string_view swizzle); -void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); -void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); -void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); -void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, std::string op_a, std::string op_b, std::string swizzle); +void EmitDPdxFine(EmitContext& ctx, std::string op_a); +void EmitDPdyFine(EmitContext& ctx, std::string op_a); +void EmitDPdxCoarse(EmitContext& ctx, std::string op_a); +void EmitDPdyCoarse(EmitContext& ctx, std::string op_a); } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index e69de29bb..8cdb9abfa 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitLoadStorage32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitLoadStorage64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitLoadStorage128([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitWriteStorage32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + ctx.Add("buff{}[{}]={};", binding.U32(), offset.U32(), value); +} + +void EmitWriteStorage64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instrucion"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 8bd40458b..cbac59ff0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -27,18 +27,6 @@ void EmitVoid(EmitContext& ctx) { NotImplemented(); } -void EmitIdentity(EmitContext& ctx, const IR::Value& value) { - const auto var{ctx.AllocVar()}; - switch (value.Type()) { - case IR::Type::U32: - ctx.Add("uint {}={};", var, value.U32()); - break; - default: - ctx.Add("EmitIdentity {}", value.Type()); - break; - } -} - void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { NotImplemented(); } @@ -51,21 +39,20 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) NotImplemented(); } -void EmitBranch(EmitContext& ctx, std::string_view label) { +void EmitBranch(EmitContext& ctx, std::string label) { NotImplemented(); } -void EmitBranchConditional(EmitContext& ctx, std::string_view condition, - std::string_view true_label, std::string_view false_label) { +void EmitBranchConditional(EmitContext& ctx, std::string condition, std::string true_label, + std::string false_label) { NotImplemented(); } -void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, - std::string_view continue_label) { +void EmitLoopMerge(EmitContext& ctx, std::string merge_label, std::string continue_label) { NotImplemented(); } -void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { +void EmitSelectionMerge(EmitContext& ctx, std::string merge_label) { NotImplemented(); } @@ -81,7 +68,7 @@ void EmitUnreachable(EmitContext& ctx) { NotImplemented(); } -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string continue_label) { NotImplemented(); } @@ -145,21 +132,20 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string vertex) { NotImplemented(); } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex) { +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string value, std::string vertex) { NotImplemented(); } -void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { +void EmitGetAttributeIndexed(EmitContext& ctx, std::string offset, std::string vertex) { NotImplemented(); } -void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, - std::string_view vertex) { +void EmitSetAttributeIndexed(EmitContext& ctx, std::string offset, std::string value, + std::string vertex) { NotImplemented(); } @@ -167,19 +153,19 @@ void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { NotImplemented(); } -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string value) { NotImplemented(); } -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string value) { NotImplemented(); } -void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { +void EmitSetSampleMask(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { +void EmitSetFragDepth(EmitContext& ctx, std::string value) { NotImplemented(); } @@ -239,11 +225,11 @@ void EmitYDirection(EmitContext& ctx) { NotImplemented(); } -void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { +void EmitLoadLocal(EmitContext& ctx, std::string word_offset) { NotImplemented(); } -void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { +void EmitWriteLocal(EmitContext& ctx, std::string word_offset, std::string value) { NotImplemented(); } @@ -283,15 +269,15 @@ void EmitLoadGlobalS16(EmitContext& ctx) { NotImplemented(); } -void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { +void EmitLoadGlobal32(EmitContext& ctx, std::string address) { NotImplemented(); } -void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { +void EmitLoadGlobal64(EmitContext& ctx, std::string address) { NotImplemented(); } -void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { +void EmitLoadGlobal128(EmitContext& ctx, std::string address) { NotImplemented(); } @@ -311,250 +297,182 @@ void EmitWriteGlobalS16(EmitContext& ctx) { NotImplemented(); } -void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { +void EmitWriteGlobal32(EmitContext& ctx, std::string address, std::string value) { NotImplemented(); } -void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { +void EmitWriteGlobal64(EmitContext& ctx, std::string address, std::string value) { NotImplemented(); } -void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { +void EmitWriteGlobal128(EmitContext& ctx, std::string address, std::string value) { NotImplemented(); } -void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedU8(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedS8(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedU16(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedS16(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedU32(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedU64(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +void EmitLoadSharedU128(EmitContext& ctx, std::string offset) { NotImplemented(); } -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { +void EmitWriteSharedU8(EmitContext& ctx, std::string offset, std::string value) { NotImplemented(); } -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { +void EmitWriteSharedU16(EmitContext& ctx, std::string offset, std::string value) { NotImplemented(); } -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { +void EmitWriteSharedU32(EmitContext& ctx, std::string offset, std::string value) { NotImplemented(); } -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { +void EmitWriteSharedU64(EmitContext& ctx, std::string offset, std::string value) { NotImplemented(); } -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - // ctx.Add("c{}[{}]={}", binding.U32() + 2, offset.U32(), 16); - ctx.Add("EmitWriteStorage32"); - // ctx.Add("c{}[{}]={}", binding.U32(), offset.U32(), value); -} - -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { +void EmitWriteSharedU128(EmitContext& ctx, std::string offset, std::string value) { NotImplemented(); } -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string e1, std::string e2) { NotImplemented(); } -void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3) { NotImplemented(); } -void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, + std::string e4) { NotImplemented(); } -void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { - NotImplemented(); -} - -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { - NotImplemented(); -} - -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { - NotImplemented(); -} - -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string e1, std::string e2) { NotImplemented(); } -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3) { NotImplemented(); } -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, + std::string e4) { NotImplemented(); } -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string e1, std::string e2) { NotImplemented(); } -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3) { NotImplemented(); } -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, + std::string e4) { NotImplemented(); } -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string composite, u32 index) { NotImplemented(); } -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } @@ -583,58 +501,58 @@ void EmitCompositeExtractF64x4(EmitContext& ctx) { NotImplemented(); } -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string composite, std::string object, u32 index) { NotImplemented(); } -void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectU1(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectU8(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectU16(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectU32(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectU64(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectF16(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectF32(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } -void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { +void EmitSelectF64(EmitContext& ctx, std::string cond, std::string true_value, + std::string false_value) { NotImplemented(); } @@ -642,7 +560,7 @@ void EmitBitCastU16F16(EmitContext& ctx) { NotImplemented(); } -void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { +void EmitBitCastU32F32(EmitContext& ctx, std::string value) { NotImplemented(); } @@ -654,7 +572,7 @@ void EmitBitCastF16U16(EmitContext& ctx) { NotImplemented(); } -void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { +void EmitBitCastF32U32(EmitContext& ctx, std::string value) { NotImplemented(); } @@ -662,35 +580,35 @@ void EmitBitCastF64U64(EmitContext& ctx) { NotImplemented(); } -void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { +void EmitPackUint2x32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { +void EmitUnpackUint2x32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { +void EmitPackFloat2x16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { +void EmitUnpackFloat2x16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { +void EmitPackHalf2x16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { +void EmitUnpackHalf2x16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { +void EmitPackDouble2x32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { +void EmitUnpackDouble2x32(EmitContext& ctx, std::string value) { NotImplemented(); } @@ -718,717 +636,702 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitFPAbs16(EmitContext& ctx, std::string_view value) { +void EmitFPAbs16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPAbs32(EmitContext& ctx, std::string_view value) { +void EmitFPAbs32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPAbs64(EmitContext& ctx, std::string_view value) { +void EmitFPAbs64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c) { +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c) { NotImplemented(); } -void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c) { +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c) { NotImplemented(); } -void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, - std::string_view c) { +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c) { NotImplemented(); } -void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitFPMax32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitFPMax64(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitFPMin32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitFPMin64(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPNeg16(EmitContext& ctx, std::string_view value) { +void EmitFPNeg16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPNeg32(EmitContext& ctx, std::string_view value) { +void EmitFPNeg32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPNeg64(EmitContext& ctx, std::string_view value) { +void EmitFPNeg64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPSin(EmitContext& ctx, std::string_view value) { +void EmitFPSin(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPCos(EmitContext& ctx, std::string_view value) { +void EmitFPCos(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPExp2(EmitContext& ctx, std::string_view value) { +void EmitFPExp2(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPLog2(EmitContext& ctx, std::string_view value) { +void EmitFPLog2(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPRecip32(EmitContext& ctx, std::string_view value) { +void EmitFPRecip32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPRecip64(EmitContext& ctx, std::string_view value) { +void EmitFPRecip64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { +void EmitFPRecipSqrt32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { +void EmitFPRecipSqrt64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPSqrt(EmitContext& ctx, std::string_view value) { +void EmitFPSqrt(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { +void EmitFPSaturate16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { +void EmitFPSaturate32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { +void EmitFPSaturate64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { +void EmitFPClamp16(EmitContext& ctx, std::string value, std::string min_value, + std::string max_value) { NotImplemented(); } -void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { +void EmitFPClamp32(EmitContext& ctx, std::string value, std::string min_value, + std::string max_value) { NotImplemented(); } -void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { +void EmitFPClamp64(EmitContext& ctx, std::string value, std::string min_value, + std::string max_value) { NotImplemented(); } -void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { +void EmitFPRoundEven16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { +void EmitFPRoundEven32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { +void EmitFPRoundEven64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPFloor16(EmitContext& ctx, std::string_view value) { +void EmitFPFloor16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPFloor32(EmitContext& ctx, std::string_view value) { +void EmitFPFloor32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPFloor64(EmitContext& ctx, std::string_view value) { +void EmitFPFloor64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPCeil16(EmitContext& ctx, std::string_view value) { +void EmitFPCeil16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPCeil32(EmitContext& ctx, std::string_view value) { +void EmitFPCeil32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPCeil64(EmitContext& ctx, std::string_view value) { +void EmitFPCeil64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { +void EmitFPTrunc16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { +void EmitFPTrunc32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { +void EmitFPTrunc64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdLessThan16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdLessThan32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdLessThan64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordLessThan16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordLessThan32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordLessThan64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { +void EmitFPIsNan16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { +void EmitFPIsNan32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { +void EmitFPIsNan64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitIAdd64(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitISub32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitISub64(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitIMul32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitINeg32(EmitContext& ctx, std::string_view value) { +void EmitINeg32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitINeg64(EmitContext& ctx, std::string_view value) { +void EmitINeg64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitIAbs32(EmitContext& ctx, std::string_view value) { +void EmitIAbs32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitIAbs64(EmitContext& ctx, std::string_view value) { +void EmitIAbs64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { +void EmitShiftLeftLogical32(EmitContext& ctx, std::string base, std::string shift) { NotImplemented(); } -void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { +void EmitShiftLeftLogical64(EmitContext& ctx, std::string base, std::string shift) { NotImplemented(); } -void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { +void EmitShiftRightLogical32(EmitContext& ctx, std::string base, std::string shift) { NotImplemented(); } -void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { +void EmitShiftRightLogical64(EmitContext& ctx, std::string base, std::string shift) { NotImplemented(); } -void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift) { +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string base, std::string shift) { NotImplemented(); } -void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift) { +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string base, std::string shift) { NotImplemented(); } -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { NotImplemented(); } -void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, - std::string_view offset, std::string_view count) { +void EmitBitFieldInsert(EmitContext& ctx, std::string base, std::string insert, std::string offset, + std::string count) { NotImplemented(); } -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, - std::string_view offset, std::string_view count) { +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, + std::string count) { NotImplemented(); } -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, - std::string_view offset, std::string_view count) { +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, + std::string count) { NotImplemented(); } -void EmitBitReverse32(EmitContext& ctx, std::string_view value) { +void EmitBitReverse32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitBitCount32(EmitContext& ctx, std::string_view value) { +void EmitBitCount32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitBitwiseNot32(EmitContext& ctx, std::string_view value) { +void EmitBitwiseNot32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFindSMsb32(EmitContext& ctx, std::string_view value) { +void EmitFindSMsb32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFindUMsb32(EmitContext& ctx, std::string_view value) { +void EmitFindUMsb32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitSMin32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitUMin32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitSMax32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitUMax32(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, - std::string_view max) { +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, + std::string max) { NotImplemented(); } -void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, - std::string_view max) { +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, + std::string max) { NotImplemented(); } -void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitSLessThan(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitULessThan(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitIEqual(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitSLessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitULessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitSGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitUGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitINotEqual(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitSGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { +void EmitUGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { NotImplemented(); } -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicInc32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicDec32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicOr32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicXor32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value) { + const IR::Value& offset, std::string value) { NotImplemented(); } void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value) { + const IR::Value& offset, std::string value) { NotImplemented(); } void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { + std::string value) { NotImplemented(); } @@ -1548,211 +1451,211 @@ void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { NotImplemented(); } -void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitLogicalOr(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitLogicalAnd(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { +void EmitLogicalXor(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitLogicalNot(EmitContext& ctx, std::string_view value) { +void EmitLogicalNot(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { +void EmitConvertS16F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { +void EmitConvertS16F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { +void EmitConvertS16F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { +void EmitConvertS32F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { +void EmitConvertS32F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { +void EmitConvertS32F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { +void EmitConvertS64F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { +void EmitConvertS64F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { +void EmitConvertS64F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { +void EmitConvertU16F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { +void EmitConvertU16F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { +void EmitConvertU16F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { +void EmitConvertU32F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { +void EmitConvertU32F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { +void EmitConvertU32F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { +void EmitConvertU64F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { +void EmitConvertU64F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { +void EmitConvertU64F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { +void EmitConvertU64U32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { +void EmitConvertU32U64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { +void EmitConvertF16F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { +void EmitConvertF32F16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { +void EmitConvertF32F64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { +void EmitConvertF64F32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S8(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { +void EmitConvertF16S64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U8(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { +void EmitConvertF16U64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S8(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { +void EmitConvertF32S64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U8(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { +void EmitConvertF32U64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S8(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { +void EmitConvertF64S64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U8(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U16(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U32(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { +void EmitConvertF64U64(EmitContext& ctx, std::string value) { NotImplemented(); } @@ -1853,69 +1756,64 @@ void EmitBoundImageWrite(EmitContext&) { } void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset) { + std::string coords, std::string bias_lc, const IR::Value& offset) { NotImplemented(); } void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset) { + std::string coords, std::string lod_lc, const IR::Value& offset) { NotImplemented(); } void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset) { + std::string coords, std::string dref, std::string bias_lc, + const IR::Value& offset) { NotImplemented(); } void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset) { + std::string coords, std::string dref, std::string lod_lc, + const IR::Value& offset) { NotImplemented(); } -void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + const IR::Value& offset, const IR::Value& offset2) { NotImplemented(); } void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref) { + std::string coords, const IR::Value& offset, const IR::Value& offset2, + std::string dref) { NotImplemented(); } -void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms) { +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + std::string offset, std::string lod, std::string ms) { NotImplemented(); } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view lod) { + std::string lod) { NotImplemented(); } void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords) { + std::string coords) { NotImplemented(); } -void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp) { +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + std::string derivates, std::string offset, std::string lod_clamp) { NotImplemented(); } -void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords) { +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords) { NotImplemented(); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, + std::string color) { NotImplemented(); } @@ -2008,57 +1906,57 @@ void EmitBoundImageAtomicExchange32(EmitContext&) { } void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - std::string_view coords, std::string_view value) { + std::string coords, std::string value) { NotImplemented(); } @@ -2066,19 +1964,19 @@ void EmitLaneId(EmitContext& ctx) { NotImplemented(); } -void EmitVoteAll(EmitContext& ctx, std::string_view pred) { +void EmitVoteAll(EmitContext& ctx, std::string pred) { NotImplemented(); } -void EmitVoteAny(EmitContext& ctx, std::string_view pred) { +void EmitVoteAny(EmitContext& ctx, std::string pred) { NotImplemented(); } -void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { +void EmitVoteEqual(EmitContext& ctx, std::string pred) { NotImplemented(); } -void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { +void EmitSubgroupBallot(EmitContext& ctx, std::string pred) { NotImplemented(); } @@ -2102,47 +2000,43 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask) { +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, + std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, - std::string_view swizzle) { +void EmitFSwizzleAdd(EmitContext& ctx, std::string op_a, std::string op_b, std::string swizzle) { NotImplemented(); } -void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { +void EmitDPdxFine(EmitContext& ctx, std::string op_a) { NotImplemented(); } -void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { +void EmitDPdyFine(EmitContext& ctx, std::string op_a) { NotImplemented(); } -void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { +void EmitDPdxCoarse(EmitContext& ctx, std::string op_a) { NotImplemented(); } -void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { +void EmitDPdyCoarse(EmitContext& ctx, std::string op_a) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 591a87988..5fdad5acb 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -10,7 +10,7 @@ #include "shader_recompiler/backend/glsl/reg_alloc.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/value.h" - +#pragma optimize("", off) namespace Shader::Backend::GLSL { namespace { constexpr std::string_view SWIZZLE = "xyzw"; @@ -24,11 +24,7 @@ std::string Representation(Id id) { } const u32 num_elements{id.num_elements_minus_one + 1}; const u32 index{static_cast(id.index)}; - if (num_elements == 4) { - return fmt::format("R{}", index); - } else { - return fmt::format("R{}.{}", index, SWIZZLE.substr(id.base_element, num_elements)); - } + return fmt::format("R{}", index); } std::string MakeImm(const IR::Value& value) { @@ -56,7 +52,8 @@ std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { } std::string RegAlloc::Consume(const IR::Value& value) { - return value.IsImmediate() ? MakeImm(value) : Consume(*value.Inst()); + const auto result = value.IsImmediate() ? MakeImm(value) : Consume(*value.InstRecursive()); + return result; } std::string RegAlloc::Consume(IR::Inst& inst) { @@ -93,4 +90,30 @@ void RegAlloc::Free(Id id) { register_use[id.index] = false; } +/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::Identity: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastU32F32: + case IR::Opcode::BitCastU64F64: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::BitCastF32U32: + case IR::Opcode::BitCastF64U64: + return true; + default: + return false; + } +} + +/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { + IR::Inst* it{&inst}; + while (IsAliased(*it)) { + const IR::Value arg{it->Arg(0)}; + if (arg.IsImmediate()) { + break; + } + it = arg.InstRecursive(); + } + return *it; +} } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 850a93d6a..a777cbbd2 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -29,6 +29,12 @@ public: std::string Consume(const IR::Value& value); + /// Returns true if the instruction is expected to be aliased to another + static bool IsAliased(const IR::Inst& inst); + + /// Returns the underlying value out of an alias sequence + static IR::Inst& AliasInst(IR::Inst& inst); + private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; From faf4cd72c556421966782cece604b64fbafa5714 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 01:12:58 -0400 Subject: [PATCH 562/770] glsl: Fix program linking and cbuf --- src/shader_recompiler/backend/glsl/emit_context.cpp | 4 ++-- .../backend/glsl/emit_glsl_context_get_set.cpp | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index b0e46cc07..8eea6344f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -29,8 +29,8 @@ void EmitContext::DefineConstantBuffers() { } u32 binding{}; for (const auto& desc : info.constant_buffer_descriptors) { - Add("layout(std140,binding={}) uniform cbuf_{}{{uint cbuf{}[];}};", binding, binding, - desc.index, desc.count); + Add("layout(std140,binding={}) uniform cbuf_{}{{vec4 cbuf{}[{}];}};", binding, binding, + desc.index, 4 * 1024); ++binding; } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index e45dfa42e..73f0faf35 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -32,7 +32,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR void EmitGetCbufU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& binding, const IR::Value& offset) { - ctx.Add("uint {}=cbuf{}[{}];", *inst, binding.U32(), offset.U32()); + const auto u32_offset{offset.U32()}; + ctx.Add("uint {}=floatBitsToUint(cbuf{}[{}][{}]);", *inst, binding.U32(), u32_offset / 16, + (u32_offset / 4) % 4); } void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, From e221baccddea3c0802c97e7f6f60c0c3e6a72b60 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 02:00:12 -0400 Subject: [PATCH 563/770] glsl: Reusable typed variables. IADD32 --- .../backend/glsl/emit_context.h | 28 ++- .../glsl/emit_glsl_context_get_set.cpp | 4 +- .../backend/glsl/emit_glsl_integer.cpp | 221 ++++++++++++++++++ .../glsl/emit_glsl_not_implemented.cpp | 173 -------------- .../backend/glsl/reg_alloc.cpp | 52 +++-- .../backend/glsl/reg_alloc.h | 36 ++- 6 files changed, 311 insertions(+), 203 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8d093a853..81b970c14 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -31,9 +31,33 @@ class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + // template + // void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + // code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); + // // TODO: Remove this + // code += '\n'; + // } + template - void Add(const char* format_str, IR::Inst& inst, Args&&... args) { - code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); + void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { + code += + fmt::format(format_str, reg_alloc.Define(inst, Type::U32), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + template + void AddS32(const char* format_str, IR::Inst& inst, Args&&... args) { + code += + fmt::format(format_str, reg_alloc.Define(inst, Type::S32), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + template + void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { + code += + fmt::format(format_str, reg_alloc.Define(inst, Type::F32), std::forward(args)...); // TODO: Remove this code += '\n'; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 73f0faf35..ff04cffd2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -33,8 +33,8 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR void EmitGetCbufU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& binding, const IR::Value& offset) { const auto u32_offset{offset.U32()}; - ctx.Add("uint {}=floatBitsToUint(cbuf{}[{}][{}]);", *inst, binding.U32(), u32_offset / 16, - (u32_offset / 4) % 4); + ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}][{}]);", *inst, binding.U32(), u32_offset / 16, + (u32_offset / 4) % 4); } void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index e69de29bb..6977f74f9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -0,0 +1,221 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + ctx.AddU32("{}={}+{};", *inst, a, b); +} + +void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, + [[maybe_unused]] std::string shift) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, + [[maybe_unused]] std::string shift) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, + [[maybe_unused]] std::string shift) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, + [[maybe_unused]] std::string shift) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string base, + [[maybe_unused]] std::string shift) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string base, + [[maybe_unused]] std::string shift) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, + [[maybe_unused]] std::string insert, [[maybe_unused]] std::string offset, + std::string count) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string offset, + std::string count) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string offset, + std::string count) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, + [[maybe_unused]] std::string b) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value, [[maybe_unused]] std::string min, + std::string max) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value, [[maybe_unused]] std::string min, + std::string max) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, + [[maybe_unused]] std::string rhs) { + throw NotImplementedException("GLSL Instruction"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index cbac59ff0..f39c1fff0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -979,179 +979,6 @@ void EmitFPIsNan64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { - NotImplemented(); -} - -void EmitIAdd64(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitISub32(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitISub64(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitIMul32(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitINeg32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitINeg64(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitIAbs32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitIAbs64(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitShiftLeftLogical32(EmitContext& ctx, std::string base, std::string shift) { - NotImplemented(); -} - -void EmitShiftLeftLogical64(EmitContext& ctx, std::string base, std::string shift) { - NotImplemented(); -} - -void EmitShiftRightLogical32(EmitContext& ctx, std::string base, std::string shift) { - NotImplemented(); -} - -void EmitShiftRightLogical64(EmitContext& ctx, std::string base, std::string shift) { - NotImplemented(); -} - -void EmitShiftRightArithmetic32(EmitContext& ctx, std::string base, std::string shift) { - NotImplemented(); -} - -void EmitShiftRightArithmetic64(EmitContext& ctx, std::string base, std::string shift) { - NotImplemented(); -} - -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { - NotImplemented(); -} - -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { - NotImplemented(); -} - -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { - NotImplemented(); -} - -void EmitBitFieldInsert(EmitContext& ctx, std::string base, std::string insert, std::string offset, - std::string count) { - NotImplemented(); -} - -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, - std::string count) { - NotImplemented(); -} - -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, - std::string count) { - NotImplemented(); -} - -void EmitBitReverse32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitBitCount32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitBitwiseNot32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitFindSMsb32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitFindUMsb32(EmitContext& ctx, std::string value) { - NotImplemented(); -} - -void EmitSMin32(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitUMin32(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitSMax32(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitUMax32(EmitContext& ctx, std::string a, std::string b) { - NotImplemented(); -} - -void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, - std::string max) { - NotImplemented(); -} - -void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, - std::string max) { - NotImplemented(); -} - -void EmitSLessThan(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitULessThan(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitIEqual(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitSLessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitULessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitSGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitUGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitINotEqual(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitSGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - -void EmitUGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs) { - NotImplemented(); -} - void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 5fdad5acb..5ad1872db 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -13,7 +13,6 @@ #pragma optimize("", off) namespace Shader::Backend::GLSL { namespace { -constexpr std::string_view SWIZZLE = "xyzw"; std::string Representation(Id id) { if (id.is_condition_code != 0) { @@ -22,7 +21,6 @@ std::string Representation(Id id) { if (id.is_spill != 0) { throw NotImplementedException("Spilling"); } - const u32 num_elements{id.num_elements_minus_one + 1}; const u32 index{static_cast(id.index)}; return fmt::format("R{}", index); } @@ -45,10 +43,11 @@ std::string MakeImm(const IR::Value& value) { } } // Anonymous namespace -std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { - const Id id{Alloc(num_elements, alignment)}; +std::string RegAlloc::Define(IR::Inst& inst, Type type) { + const Id id{Alloc()}; + const auto type_str{GetType(type, id.index)}; inst.SetDefinition(id); - return Representation(id); + return type_str + Representation(id); } std::string RegAlloc::Consume(const IR::Value& value) { @@ -65,20 +64,37 @@ std::string RegAlloc::Consume(IR::Inst& inst) { return Representation(inst.Definition()); } -Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { - for (size_t reg = 0; reg < NUM_REGS; ++reg) { - if (register_use[reg]) { - continue; +std::string RegAlloc::GetType(Type type, u32 index) { + if (register_defined[index]) { + return ""; + } + register_defined[index] = true; + switch (type) { + case Type::U32: + return "uint "; + case Type::S32: + return "int "; + case Type::F32: + return "float "; + default: + return ""; + } +} + +Id RegAlloc::Alloc() { + if (num_used_registers < NUM_REGS) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (register_use[reg]) { + continue; + } + register_use[reg] = true; + Id ret{}; + ret.index.Assign(static_cast(reg)); + ret.is_long.Assign(0); + ret.is_spill.Assign(0); + ret.is_condition_code.Assign(0); + return ret; } - num_used_registers = std::max(num_used_registers, reg + 1); - register_use[reg] = true; - return Id{ - .base_element = 0, - .num_elements_minus_one = num_elements - 1, - .index = static_cast(reg), - .is_spill = 0, - .is_condition_code = 0, - }; } throw NotImplementedException("Register spilling"); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index a777cbbd2..9b98aab39 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -6,6 +6,7 @@ #include +#include "common/bit_field.h" #include "common/common_types.h" namespace Shader::IR { @@ -14,18 +15,36 @@ class Value; } // namespace Shader::IR namespace Shader::Backend::GLSL { +enum class Type : u32 { + U32, + S32, + F32, + U64, + F64, + Void, +}; struct Id { - u32 base_element : 2; - u32 num_elements_minus_one : 2; - u32 index : 26; - u32 is_spill : 1; - u32 is_condition_code : 1; + union { + u32 raw; + BitField<0, 29, u32> index; + BitField<29, 1, u32> is_long; + BitField<30, 1, u32> is_spill; + BitField<31, 1, u32> is_condition_code; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } }; +static_assert(sizeof(Id) == sizeof(u32)); class RegAlloc { public: - std::string Define(IR::Inst& inst, u32 num_elements = 1, u32 alignment = 1); + std::string Define(IR::Inst& inst, Type type = Type::Void); std::string Consume(const IR::Value& value); @@ -40,13 +59,14 @@ private: static constexpr size_t NUM_ELEMENTS = 4; std::string Consume(IR::Inst& inst); + std::string GetType(Type type, u32 index); - Id Alloc(u32 num_elements, u32 alignment); - + Id Alloc(); void Free(Id id); size_t num_used_registers{}; std::bitset register_use{}; + std::bitset register_defined{}; }; } // namespace Shader::Backend::GLSL From 78f5eb90d7c5ecd92c4e8b7f71cf97b57d78dbfd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 02:20:08 -0400 Subject: [PATCH 564/770] glsl: INeg and IAdd negate tests --- .../backend/glsl/emit_glsl_instructions.h | 72 +++++----- .../backend/glsl/emit_glsl_integer.cpp | 129 ++++++++++-------- .../backend/glsl/reg_alloc.cpp | 3 +- 3 files changed, 108 insertions(+), 96 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 16e01c81c..550193dc3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -317,52 +317,54 @@ void EmitFPIsNan16(EmitContext& ctx, std::string value); void EmitFPIsNan32(EmitContext& ctx, std::string value); void EmitFPIsNan64(EmitContext& ctx, std::string value); void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitIAdd64(EmitContext& ctx, std::string a, std::string b); -void EmitISub32(EmitContext& ctx, std::string a, std::string b); -void EmitISub64(EmitContext& ctx, std::string a, std::string b); -void EmitIMul32(EmitContext& ctx, std::string a, std::string b); -void EmitINeg32(EmitContext& ctx, std::string value); -void EmitINeg64(EmitContext& ctx, std::string value); -void EmitIAbs32(EmitContext& ctx, std::string value); -void EmitIAbs64(EmitContext& ctx, std::string value); -void EmitShiftLeftLogical32(EmitContext& ctx, std::string base, std::string shift); -void EmitShiftLeftLogical64(EmitContext& ctx, std::string base, std::string shift); -void EmitShiftRightLogical32(EmitContext& ctx, std::string base, std::string shift); -void EmitShiftRightLogical64(EmitContext& ctx, std::string base, std::string shift); -void EmitShiftRightArithmetic32(EmitContext& ctx, std::string base, std::string shift); -void EmitShiftRightArithmetic64(EmitContext& ctx, std::string base, std::string shift); +void EmitIAdd64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitISub32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitISub64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitIMul32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitINeg32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitINeg64(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitIAbs32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitIAbs64(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst* inst, std::string base, + std::string shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst* inst, std::string base, + std::string shift); void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitBitFieldInsert(EmitContext& ctx, std::string base, std::string insert, std::string offset, - std::string count); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst* inst, std::string base, std::string insert, + std::string offset, std::string count); void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, std::string count); void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, std::string count); -void EmitBitReverse32(EmitContext& ctx, std::string value); -void EmitBitCount32(EmitContext& ctx, std::string value); -void EmitBitwiseNot32(EmitContext& ctx, std::string value); -void EmitFindSMsb32(EmitContext& ctx, std::string value); -void EmitFindUMsb32(EmitContext& ctx, std::string value); -void EmitSMin32(EmitContext& ctx, std::string a, std::string b); -void EmitUMin32(EmitContext& ctx, std::string a, std::string b); -void EmitSMax32(EmitContext& ctx, std::string a, std::string b); -void EmitUMax32(EmitContext& ctx, std::string a, std::string b); +void EmitBitReverse32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitBitCount32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst* inst, std::string value); +void EmitSMin32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitUMin32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitSMax32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitUMax32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, std::string max); void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, std::string max); -void EmitSLessThan(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitULessThan(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitIEqual(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitSLessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitULessThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitSGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitUGreaterThan(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitINotEqual(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, std::string lhs, std::string rhs); +void EmitSLessThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value); void EmitSharedAtomicSMin32(EmitContext& ctx, std::string pointer_offset, std::string value); void EmitSharedAtomicUMin32(EmitContext& ctx, std::string pointer_offset, std::string value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 6977f74f9..f03b2dba9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -16,69 +16,75 @@ void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* in ctx.AddU32("{}={}+{};", *inst, a, b); } -void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { + ctx.AddU32("{}=-{};", *inst, value); +} + +void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, - [[maybe_unused]] std::string shift) { +void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, - [[maybe_unused]] std::string shift) { - throw NotImplementedException("GLSL Instruction"); -} - -void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, +void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, +void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, +void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, +void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); @@ -99,9 +105,9 @@ void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In throw NotImplementedException("GLSL Instruction"); } -void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string base, - [[maybe_unused]] std::string insert, [[maybe_unused]] std::string offset, - std::string count) { +void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string base, [[maybe_unused]] std::string insert, + [[maybe_unused]] std::string offset, std::string count) { throw NotImplementedException("GLSL Instruction"); } @@ -117,43 +123,48 @@ void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR throw NotImplementedException("GLSL Instruction"); } -void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string value) { +void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string a, - [[maybe_unused]] std::string b) { +void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } @@ -169,53 +180,53 @@ void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* throw NotImplementedException("GLSL Instruction"); } -void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string lhs, - [[maybe_unused]] std::string rhs) { +void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, + [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 5ad1872db..f4886dbfd 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -51,8 +51,7 @@ std::string RegAlloc::Define(IR::Inst& inst, Type type) { } std::string RegAlloc::Consume(const IR::Value& value) { - const auto result = value.IsImmediate() ? MakeImm(value) : Consume(*value.InstRecursive()); - return result; + return value.IsImmediate() ? MakeImm(value) : Consume(*value.InstRecursive()); } std::string RegAlloc::Consume(IR::Inst& inst) { From 115c162b9afacbf36dd6c38ef06ccf71e10c336b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 19:28:03 -0400 Subject: [PATCH 565/770] glsl: Pass IR::Inst& to Emit functions --- .../backend/glsl/emit_glsl.cpp | 8 +- .../glsl/emit_glsl_bitwise_conversion.cpp | 4 +- .../glsl/emit_glsl_context_get_set.cpp | 4 +- .../backend/glsl/emit_glsl_instructions.h | 162 +++++++++--------- .../backend/glsl/emit_glsl_integer.cpp | 88 +++++----- .../glsl/emit_glsl_not_implemented.cpp | 74 ++++---- 6 files changed, 169 insertions(+), 171 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 77c93146e..0f528b027 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -37,8 +37,6 @@ template ArgType Arg(EmitContext& ctx, const IR::Value& arg) { if constexpr (std::is_same_v) { return ctx.reg_alloc.Consume(arg); - } else if constexpr (std::is_same_v) { - return *arg.Inst(); } else if constexpr (std::is_same_v) { return arg; } else if constexpr (std::is_same_v) { @@ -58,7 +56,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { SetDefinition( - ctx, inst, inst, + ctx, inst, *inst, Arg>(ctx, inst->Arg(I))...); } else { SetDefinition( @@ -66,7 +64,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { } } else { if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, *inst, Arg>(ctx, inst->Arg(I))...); } else { func(ctx, Arg>(ctx, inst->Arg(I))...); } @@ -81,7 +79,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { Invoke(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; - static constexpr bool is_first_arg_inst = std::is_same_v; + static constexpr bool is_first_arg_inst = std::is_same_v; using Indices = std::make_index_sequence; Invoke(ctx, inst, Indices{}); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 7c654e4e7..f40f9900c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -22,7 +22,7 @@ static void Alias(IR::Inst& inst, const IR::Value& value) { } } // namespace -void EmitIdentity(EmitContext&, IR::Inst* inst, const IR::Value& value) { - Alias(*inst, value); +void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { + Alias(inst, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index ff04cffd2..2f4ecd6a1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -30,10 +30,10 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR throw NotImplementedException("GLSL"); } -void EmitGetCbufU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& binding, +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto u32_offset{offset.U32()}; - ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}][{}]);", *inst, binding.U32(), u32_offset / 16, + ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}][{}]);", inst, binding.U32(), u32_offset / 16, (u32_offset / 4) % 4); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 550193dc3..681e0bdb9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -25,9 +25,9 @@ inline void EmitSetLoopSafetyVariable(EmitContext&) {} inline void EmitGetLoopSafetyVariable(EmitContext&) {} // Microinstruction emitters -void EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); -void EmitIdentity(EmitContext& ctx, IR::Inst* inst, const IR::Value& value); +void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); @@ -59,7 +59,7 @@ void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& binding, +void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); @@ -231,19 +231,19 @@ void EmitGetInBoundsFromOp(EmitContext& ctx); void EmitFPAbs16(EmitContext& ctx, std::string value); void EmitFPAbs32(EmitContext& ctx, std::string value); void EmitFPAbs64(EmitContext& ctx, std::string value); -void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c); -void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c); -void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c); void EmitFPMax32(EmitContext& ctx, std::string a, std::string b); void EmitFPMax64(EmitContext& ctx, std::string a, std::string b); void EmitFPMin32(EmitContext& ctx, std::string a, std::string b); void EmitFPMin64(EmitContext& ctx, std::string a, std::string b); -void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); void EmitFPNeg16(EmitContext& ctx, std::string value); void EmitFPNeg32(EmitContext& ctx, std::string value); void EmitFPNeg64(EmitContext& ctx, std::string value); @@ -316,55 +316,55 @@ void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::strin void EmitFPIsNan16(EmitContext& ctx, std::string value); void EmitFPIsNan32(EmitContext& ctx, std::string value); void EmitFPIsNan64(EmitContext& ctx, std::string value); -void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitIAdd64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitISub32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitISub64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitIMul32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitINeg32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitINeg64(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitIAbs32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitIAbs64(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); -void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); -void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); -void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst* inst, std::string base, std::string shift); -void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst* inst, std::string base, +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); -void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst* inst, std::string base, +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitBitFieldInsert(EmitContext& ctx, IR::Inst* inst, std::string base, std::string insert, +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string base, std::string insert, std::string offset, std::string count); -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string base, std::string offset, std::string count); -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string base, std::string offset, +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string base, std::string offset, std::string count); -void EmitBitReverse32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitBitCount32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitBitwiseNot32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitFindSMsb32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitFindUMsb32(EmitContext& ctx, IR::Inst* inst, std::string value); -void EmitSMin32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitUMin32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitSMax32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitUMax32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b); -void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string value, std::string min, std::string max); -void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string value, std::string min, +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string value, std::string min, std::string max); -void EmitSLessThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitULessThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitIEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitSLessThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitULessThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitSGreaterThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitUGreaterThan(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitINotEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst* inst, std::string lhs, std::string rhs); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value); void EmitSharedAtomicSMin32(EmitContext& ctx, std::string pointer_offset, std::string value); void EmitSharedAtomicUMin32(EmitContext& ctx, std::string pointer_offset, std::string value); @@ -536,31 +536,31 @@ void EmitBoundImageQueryLod(EmitContext&); void EmitBoundImageGradient(EmitContext&); void EmitBoundImageRead(EmitContext&); void EmitBoundImageWrite(EmitContext&); -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string bias_lc, const IR::Value& offset); -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string lod_lc, const IR::Value& offset); -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string dref, std::string bias_lc, const IR::Value& offset); -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string dref, std::string lod_lc, const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, const IR::Value& offset, const IR::Value& offset2); -void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, const IR::Value& offset, const IR::Value& offset2, std::string dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string offset, std::string lod, std::string ms); -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string lod); -void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords); -void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string derivates, std::string offset, std::string lod_clamp); -void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string color); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); @@ -584,27 +584,27 @@ void EmitBoundImageAtomicAnd32(EmitContext&); void EmitBoundImageAtomicOr32(EmitContext&); void EmitBoundImageAtomicXor32(EmitContext&); void EmitBoundImageAtomicExchange32(EmitContext&); -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value); void EmitLaneId(EmitContext& ctx); void EmitVoteAll(EmitContext& ctx, std::string pred); @@ -616,13 +616,13 @@ void EmitSubgroupLtMask(EmitContext& ctx); void EmitSubgroupLeMask(EmitContext& ctx); void EmitSubgroupGtMask(EmitContext& ctx); void EmitSubgroupGeMask(EmitContext& ctx); -void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask); -void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask); -void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask); -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask); void EmitFSwizzleAdd(EmitContext& ctx, std::string op_a, std::string op_b, std::string swizzle); void EmitDPdxFine(EmitContext& ctx, std::string op_a); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index f03b2dba9..0dadf1d93 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -11,221 +11,221 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { -void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { - ctx.AddU32("{}={}+{};", *inst, a, b); + ctx.AddU32("{}={}+{};", inst, a, b); } -void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { - ctx.AddU32("{}=-{};", *inst, value); + ctx.AddU32("{}=-{};", inst, value); } -void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string insert, [[maybe_unused]] std::string offset, std::string count) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string offset, std::string count) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string base, [[maybe_unused]] std::string offset, std::string count) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value) { throw NotImplementedException("GLSL Instruction"); } -void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { throw NotImplementedException("GLSL Instruction"); } -void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value, [[maybe_unused]] std::string min, std::string max) { throw NotImplementedException("GLSL Instruction"); } -void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string value, [[maybe_unused]] std::string min, std::string max) { throw NotImplementedException("GLSL Instruction"); } -void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } -void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst, +void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { throw NotImplementedException("GLSL Instruction"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index f39c1fff0..0775d5c84 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -19,7 +19,7 @@ static void NotImplemented() { throw NotImplementedException("GLSL instruction"); } -void EmitPhi(EmitContext& ctx, IR::Inst* inst) { +void EmitPhi(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } @@ -648,27 +648,27 @@ void EmitFPAbs64(EmitContext& ctx, std::string value) { NotImplemented(); } -void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c) { +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c) { NotImplemented(); } -void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c) { +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c) { NotImplemented(); } -void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b, std::string c) { +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c) { NotImplemented(); } @@ -688,15 +688,15 @@ void EmitFPMin64(EmitContext& ctx, std::string a, std::string b) { NotImplemented(); } -void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { NotImplemented(); } -void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string a, std::string b) { +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { NotImplemented(); } @@ -1582,64 +1582,64 @@ void EmitBoundImageWrite(EmitContext&) { NotImplemented(); } -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string bias_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string lod_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string dref, std::string bias_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string dref, std::string lod_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, const IR::Value& offset, const IR::Value& offset2) { NotImplemented(); } -void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, const IR::Value& offset, const IR::Value& offset2, std::string dref) { NotImplemented(); } -void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string offset, std::string lod, std::string ms) { NotImplemented(); } -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string lod) { NotImplemented(); } -void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords) { NotImplemented(); } -void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string derivates, std::string offset, std::string lod_clamp) { NotImplemented(); } -void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords) { +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords) { NotImplemented(); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, std::string coords, +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string color) { NotImplemented(); } @@ -1732,57 +1732,57 @@ void EmitBoundImageAtomicExchange32(EmitContext&) { NotImplemented(); } -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, std::string value) { NotImplemented(); } @@ -1827,22 +1827,22 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask) { NotImplemented(); } -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string value, std::string index, +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, std::string clamp, std::string segmentation_mask) { NotImplemented(); } From fb75d122a242a5e43d36edc916e16a873f807acd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 19:55:58 -0400 Subject: [PATCH 566/770] glsl: Use std::string_view for Emit function args. --- .../backend/glsl/emit_glsl.cpp | 7 +- .../backend/glsl/emit_glsl.h | 6 +- .../backend/glsl/emit_glsl_instructions.h | 788 +++++++++--------- .../backend/glsl/emit_glsl_integer.cpp | 114 +-- .../backend/glsl/emit_glsl_memory.cpp | 14 +- .../glsl/emit_glsl_not_implemented.cpp | 669 ++++++++------- 6 files changed, 838 insertions(+), 760 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 0f528b027..d1c58cefc 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -34,8 +34,8 @@ void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { } template -ArgType Arg(EmitContext& ctx, const IR::Value& arg) { - if constexpr (std::is_same_v) { +auto Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { return ctx.reg_alloc.Consume(arg); } else if constexpr (std::is_same_v) { return arg; @@ -143,7 +143,8 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } // Anonymous namespace -std::string EmitGLSL(const Profile& profile, IR::Program& program, Bindings& bindings) { +std::string EmitGLSL(const Profile& profile, const RuntimeInfo&, IR::Program& program, + Bindings& bindings) { EmitContext ctx{program, bindings, profile}; // ctx.SetupBuffers(); EmitCode(ctx, program); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h index a7c666107..fe221fa7c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -12,12 +12,12 @@ namespace Shader::Backend::GLSL { -[[nodiscard]] std::string EmitGLSL(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitGLSL(profile, program, binding); + return EmitGLSL(profile, {}, program, binding); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 681e0bdb9..ff0c9cd95 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -31,15 +31,15 @@ void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); -void EmitBranch(EmitContext& ctx, std::string label); -void EmitBranchConditional(EmitContext& ctx, std::string condition, std::string true_label, - std::string false_label); -void EmitLoopMerge(EmitContext& ctx, std::string merge_label, std::string continue_label); -void EmitSelectionMerge(EmitContext& ctx, std::string merge_label); +void EmitBranch(EmitContext& ctx, std::string_view label); +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label); +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); void EmitReturn(EmitContext& ctx); void EmitJoin(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string continue_label); +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); @@ -63,16 +63,17 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string value, std::string vertex); -void EmitGetAttributeIndexed(EmitContext& ctx, std::string offset, std::string vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, std::string offset, std::string value, - std::string vertex); +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); void EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string value); -void EmitSetSampleMask(EmitContext& ctx, std::string value); -void EmitSetFragDepth(EmitContext& ctx, std::string value); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); @@ -87,8 +88,8 @@ void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx); -void EmitLoadLocal(EmitContext& ctx, std::string word_offset); -void EmitWriteLocal(EmitContext& ctx, std::string word_offset, std::string value); +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); void EmitUndefU1(EmitContext& ctx); void EmitUndefU8(EmitContext& ctx); void EmitUndefU16(EmitContext& ctx); @@ -98,16 +99,16 @@ void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx, std::string address); -void EmitLoadGlobal64(EmitContext& ctx, std::string address); -void EmitLoadGlobal128(EmitContext& ctx, std::string address); +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); void EmitWriteGlobalU8(EmitContext& ctx); void EmitWriteGlobalS8(EmitContext& ctx); void EmitWriteGlobalU16(EmitContext& ctx); void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, std::string address, std::string value); -void EmitWriteGlobal64(EmitContext& ctx, std::string address, std::string value); -void EmitWriteGlobal128(EmitContext& ctx, std::string address, std::string value); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); @@ -116,69 +117,72 @@ void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Val void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); -void EmitLoadSharedU8(EmitContext& ctx, std::string offset); -void EmitLoadSharedS8(EmitContext& ctx, std::string offset); -void EmitLoadSharedU16(EmitContext& ctx, std::string offset); -void EmitLoadSharedS16(EmitContext& ctx, std::string offset); -void EmitLoadSharedU32(EmitContext& ctx, std::string offset); -void EmitLoadSharedU64(EmitContext& ctx, std::string offset); -void EmitLoadSharedU128(EmitContext& ctx, std::string offset); -void EmitWriteSharedU8(EmitContext& ctx, std::string offset, std::string value); -void EmitWriteSharedU16(EmitContext& ctx, std::string offset, std::string value); -void EmitWriteSharedU32(EmitContext& ctx, std::string offset, std::string value); -void EmitWriteSharedU64(EmitContext& ctx, std::string offset, std::string value); -void EmitWriteSharedU128(EmitContext& ctx, std::string offset, std::string value); -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string e1, std::string e2); -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3); -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, - std::string e4); -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string composite, std::string object, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string e1, std::string e2); -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3); -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, - std::string e4); -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string e1, std::string e2); -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3); -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, - std::string e4); -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string composite, u32 index); -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); @@ -186,251 +190,274 @@ void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitSelectU1(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectU8(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectU16(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectU32(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectU64(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectF16(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectF32(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); -void EmitSelectF64(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value); +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); void EmitBitCastU16F16(EmitContext& ctx); -void EmitBitCastU32F32(EmitContext& ctx, std::string value); +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); void EmitBitCastU64F64(EmitContext& ctx); void EmitBitCastF16U16(EmitContext& ctx); -void EmitBitCastF32U32(EmitContext& ctx, std::string value); +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); void EmitBitCastF64U64(EmitContext& ctx); -void EmitPackUint2x32(EmitContext& ctx, std::string value); -void EmitUnpackUint2x32(EmitContext& ctx, std::string value); -void EmitPackFloat2x16(EmitContext& ctx, std::string value); -void EmitUnpackFloat2x16(EmitContext& ctx, std::string value); -void EmitPackHalf2x16(EmitContext& ctx, std::string value); -void EmitUnpackHalf2x16(EmitContext& ctx, std::string value); -void EmitPackDouble2x32(EmitContext& ctx, std::string value); -void EmitUnpackDouble2x32(EmitContext& ctx, std::string value); +void EmitPackUint2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); void EmitGetInBoundsFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx, std::string value); -void EmitFPAbs32(EmitContext& ctx, std::string value); -void EmitFPAbs64(EmitContext& ctx, std::string value); -void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c); -void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c); -void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c); -void EmitFPMax32(EmitContext& ctx, std::string a, std::string b); -void EmitFPMax64(EmitContext& ctx, std::string a, std::string b); -void EmitFPMin32(EmitContext& ctx, std::string a, std::string b); -void EmitFPMin64(EmitContext& ctx, std::string a, std::string b); -void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitFPNeg16(EmitContext& ctx, std::string value); -void EmitFPNeg32(EmitContext& ctx, std::string value); -void EmitFPNeg64(EmitContext& ctx, std::string value); -void EmitFPSin(EmitContext& ctx, std::string value); -void EmitFPCos(EmitContext& ctx, std::string value); -void EmitFPExp2(EmitContext& ctx, std::string value); -void EmitFPLog2(EmitContext& ctx, std::string value); -void EmitFPRecip32(EmitContext& ctx, std::string value); -void EmitFPRecip64(EmitContext& ctx, std::string value); -void EmitFPRecipSqrt32(EmitContext& ctx, std::string value); -void EmitFPRecipSqrt64(EmitContext& ctx, std::string value); -void EmitFPSqrt(EmitContext& ctx, std::string value); -void EmitFPSaturate16(EmitContext& ctx, std::string value); -void EmitFPSaturate32(EmitContext& ctx, std::string value); -void EmitFPSaturate64(EmitContext& ctx, std::string value); -void EmitFPClamp16(EmitContext& ctx, std::string value, std::string min_value, - std::string max_value); -void EmitFPClamp32(EmitContext& ctx, std::string value, std::string min_value, - std::string max_value); -void EmitFPClamp64(EmitContext& ctx, std::string value, std::string min_value, - std::string max_value); -void EmitFPRoundEven16(EmitContext& ctx, std::string value); -void EmitFPRoundEven32(EmitContext& ctx, std::string value); -void EmitFPRoundEven64(EmitContext& ctx, std::string value); -void EmitFPFloor16(EmitContext& ctx, std::string value); -void EmitFPFloor32(EmitContext& ctx, std::string value); -void EmitFPFloor64(EmitContext& ctx, std::string value); -void EmitFPCeil16(EmitContext& ctx, std::string value); -void EmitFPCeil32(EmitContext& ctx, std::string value); -void EmitFPCeil64(EmitContext& ctx, std::string value); -void EmitFPTrunc16(EmitContext& ctx, std::string value); -void EmitFPTrunc32(EmitContext& ctx, std::string value); -void EmitFPTrunc64(EmitContext& ctx, std::string value); -void EmitFPOrdEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdLessThan16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdLessThan32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdLessThan64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordLessThan16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordLessThan32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordLessThan64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs); -void EmitFPIsNan16(EmitContext& ctx, std::string value); -void EmitFPIsNan32(EmitContext& ctx, std::string value); -void EmitFPIsNan64(EmitContext& ctx, std::string value); -void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); -void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); -void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); -void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string base, std::string shift); -void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string base, - std::string shift); -void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string base, - std::string shift); -void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string base, std::string insert, - std::string offset, std::string count); -void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string base, std::string offset, - std::string count); -void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string base, std::string offset, - std::string count); -void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string value); -void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b); -void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string value, std::string min, - std::string max); -void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string value, std::string min, - std::string max); -void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string lhs, std::string rhs); -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicInc32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicDec32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicOr32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicXor32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string pointer_offset, std::string value); -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string pointer_offset, std::string value); +void EmitFPAbs16(EmitContext& ctx, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, std::string_view value); +void EmitFPSin(EmitContext& ctx, std::string_view value); +void EmitFPCos(EmitContext& ctx, std::string_view value); +void EmitFPExp2(EmitContext& ctx, std::string_view value); +void EmitFPLog2(EmitContext& ctx, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string value); + const IR::Value& offset, std::string_view value); void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string value); + const IR::Value& offset, std::string_view value); void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value); + std::string_view value); void EmitGlobalAtomicIAdd32(EmitContext& ctx); void EmitGlobalAtomicSMin32(EmitContext& ctx); void EmitGlobalAtomicUMin32(EmitContext& ctx); @@ -460,58 +487,58 @@ void EmitGlobalAtomicMinF16x2(EmitContext& ctx); void EmitGlobalAtomicMinF32x2(EmitContext& ctx); void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -void EmitLogicalOr(EmitContext& ctx, std::string a, std::string b); -void EmitLogicalAnd(EmitContext& ctx, std::string a, std::string b); -void EmitLogicalXor(EmitContext& ctx, std::string a, std::string b); -void EmitLogicalNot(EmitContext& ctx, std::string value); -void EmitConvertS16F16(EmitContext& ctx, std::string value); -void EmitConvertS16F32(EmitContext& ctx, std::string value); -void EmitConvertS16F64(EmitContext& ctx, std::string value); -void EmitConvertS32F16(EmitContext& ctx, std::string value); -void EmitConvertS32F32(EmitContext& ctx, std::string value); -void EmitConvertS32F64(EmitContext& ctx, std::string value); -void EmitConvertS64F16(EmitContext& ctx, std::string value); -void EmitConvertS64F32(EmitContext& ctx, std::string value); -void EmitConvertS64F64(EmitContext& ctx, std::string value); -void EmitConvertU16F16(EmitContext& ctx, std::string value); -void EmitConvertU16F32(EmitContext& ctx, std::string value); -void EmitConvertU16F64(EmitContext& ctx, std::string value); -void EmitConvertU32F16(EmitContext& ctx, std::string value); -void EmitConvertU32F32(EmitContext& ctx, std::string value); -void EmitConvertU32F64(EmitContext& ctx, std::string value); -void EmitConvertU64F16(EmitContext& ctx, std::string value); -void EmitConvertU64F32(EmitContext& ctx, std::string value); -void EmitConvertU64F64(EmitContext& ctx, std::string value); -void EmitConvertU64U32(EmitContext& ctx, std::string value); -void EmitConvertU32U64(EmitContext& ctx, std::string value); -void EmitConvertF16F32(EmitContext& ctx, std::string value); -void EmitConvertF32F16(EmitContext& ctx, std::string value); -void EmitConvertF32F64(EmitContext& ctx, std::string value); -void EmitConvertF64F32(EmitContext& ctx, std::string value); -void EmitConvertF16S8(EmitContext& ctx, std::string value); -void EmitConvertF16S16(EmitContext& ctx, std::string value); -void EmitConvertF16S32(EmitContext& ctx, std::string value); -void EmitConvertF16S64(EmitContext& ctx, std::string value); -void EmitConvertF16U8(EmitContext& ctx, std::string value); -void EmitConvertF16U16(EmitContext& ctx, std::string value); -void EmitConvertF16U32(EmitContext& ctx, std::string value); -void EmitConvertF16U64(EmitContext& ctx, std::string value); -void EmitConvertF32S8(EmitContext& ctx, std::string value); -void EmitConvertF32S16(EmitContext& ctx, std::string value); -void EmitConvertF32S32(EmitContext& ctx, std::string value); -void EmitConvertF32S64(EmitContext& ctx, std::string value); -void EmitConvertF32U8(EmitContext& ctx, std::string value); -void EmitConvertF32U16(EmitContext& ctx, std::string value); -void EmitConvertF32U32(EmitContext& ctx, std::string value); -void EmitConvertF32U64(EmitContext& ctx, std::string value); -void EmitConvertF64S8(EmitContext& ctx, std::string value); -void EmitConvertF64S16(EmitContext& ctx, std::string value); -void EmitConvertF64S32(EmitContext& ctx, std::string value); -void EmitConvertF64S64(EmitContext& ctx, std::string value); -void EmitConvertF64U8(EmitContext& ctx, std::string value); -void EmitConvertF64U16(EmitContext& ctx, std::string value); -void EmitConvertF64U32(EmitContext& ctx, std::string value); -void EmitConvertF64U64(EmitContext& ctx, std::string value); +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, std::string_view value); void EmitBindlessImageSampleImplicitLod(EmitContext&); void EmitBindlessImageSampleExplicitLod(EmitContext&); void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); @@ -537,31 +564,36 @@ void EmitBoundImageGradient(EmitContext&); void EmitBoundImageRead(EmitContext&); void EmitBoundImageWrite(EmitContext&); void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string bias_lc, const IR::Value& offset); + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string lod_lc, const IR::Value& offset); + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string dref, std::string bias_lc, - const IR::Value& offset); + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string dref, std::string lod_lc, - const IR::Value& offset); -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - const IR::Value& offset, const IR::Value& offset2); + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, const IR::Value& offset, const IR::Value& offset2, - std::string dref); -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - std::string offset, std::string lod, std::string ms); + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string lod); + std::string_view lod); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords); -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - std::string derivates, std::string offset, std::string lod_clamp); -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - std::string color); + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color); void EmitBindlessImageAtomicIAdd32(EmitContext&); void EmitBindlessImageAtomicSMin32(EmitContext&); void EmitBindlessImageAtomicUMin32(EmitContext&); @@ -585,49 +617,53 @@ void EmitBoundImageAtomicOr32(EmitContext&); void EmitBoundImageAtomicXor32(EmitContext&); void EmitBoundImageAtomicExchange32(EmitContext&); void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value); + std::string_view coords, std::string_view value); void EmitLaneId(EmitContext& ctx); -void EmitVoteAll(EmitContext& ctx, std::string pred); -void EmitVoteAny(EmitContext& ctx, std::string pred); -void EmitVoteEqual(EmitContext& ctx, std::string pred); -void EmitSubgroupBallot(EmitContext& ctx, std::string pred); +void EmitVoteAll(EmitContext& ctx, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); void EmitSubgroupEqMask(EmitContext& ctx); void EmitSubgroupLtMask(EmitContext& ctx); void EmitSubgroupLeMask(EmitContext& ctx); void EmitSubgroupGtMask(EmitContext& ctx); void EmitSubgroupGeMask(EmitContext& ctx); -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask); -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask); -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask); -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask); -void EmitFSwizzleAdd(EmitContext& ctx, std::string op_a, std::string op_b, std::string swizzle); -void EmitDPdxFine(EmitContext& ctx, std::string op_a); -void EmitDPdyFine(EmitContext& ctx, std::string op_a); -void EmitDPdxCoarse(EmitContext& ctx, std::string op_a); -void EmitDPdyCoarse(EmitContext& ctx, std::string op_a); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 0dadf1d93..a22313141 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -12,221 +12,233 @@ namespace Shader::Backend::GLSL { void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { ctx.AddU32("{}={}+{};", inst, a, b); } void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { ctx.AddU32("{}=-{};", inst, value); } void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { throw NotImplementedException("GLSL Instruction"); } void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, [[maybe_unused]] std::string shift) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { throw NotImplementedException("GLSL Instruction"); } void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, - [[maybe_unused]] std::string shift) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { throw NotImplementedException("GLSL Instruction"); } void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, - [[maybe_unused]] std::string shift) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { throw NotImplementedException("GLSL Instruction"); } void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, - [[maybe_unused]] std::string shift) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { throw NotImplementedException("GLSL Instruction"); } void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, - [[maybe_unused]] std::string shift) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view shift) { throw NotImplementedException("GLSL Instruction"); } void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, [[maybe_unused]] std::string insert, - [[maybe_unused]] std::string offset, std::string count) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view insert, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view count) { throw NotImplementedException("GLSL Instruction"); } void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, [[maybe_unused]] std::string offset, - std::string count) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view count) { throw NotImplementedException("GLSL Instruction"); } void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string base, [[maybe_unused]] std::string offset, - std::string count) { + [[maybe_unused]] std::string_view base, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view count) { throw NotImplementedException("GLSL Instruction"); } void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instruction"); } void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string a, [[maybe_unused]] std::string b) { + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { throw NotImplementedException("GLSL Instruction"); } void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value, [[maybe_unused]] std::string min, - std::string max) { + [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, + [[maybe_unused]] std::string_view max) { throw NotImplementedException("GLSL Instruction"); } void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string value, [[maybe_unused]] std::string min, - std::string max) { + [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, + [[maybe_unused]] std::string_view max) { throw NotImplementedException("GLSL Instruction"); } void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitULessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitIEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitSLessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitULessThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitSGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string lhs, [[maybe_unused]] std::string rhs) { + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { throw NotImplementedException("GLSL Instruction"); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 8cdb9abfa..2344fd2a9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -51,49 +51,49 @@ void EmitLoadStorage128([[maybe_unused]] EmitContext& ctx, void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instrucion"); } void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instrucion"); } void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instrucion"); } void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instrucion"); } void EmitWriteStorage32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { ctx.Add("buff{}[{}]={};", binding.U32(), offset.U32(), value); } void EmitWriteStorage64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instrucion"); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string value) { + [[maybe_unused]] std::string_view value) { throw NotImplementedException("GLSL Instrucion"); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 0775d5c84..ff2b30eee 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -39,20 +39,21 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) NotImplemented(); } -void EmitBranch(EmitContext& ctx, std::string label) { +void EmitBranch(EmitContext& ctx, std::string_view label) { NotImplemented(); } -void EmitBranchConditional(EmitContext& ctx, std::string condition, std::string true_label, - std::string false_label) { +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label) { NotImplemented(); } -void EmitLoopMerge(EmitContext& ctx, std::string merge_label, std::string continue_label) { +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, + std::string_view continue_label) { NotImplemented(); } -void EmitSelectionMerge(EmitContext& ctx, std::string merge_label) { +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { NotImplemented(); } @@ -68,7 +69,7 @@ void EmitUnreachable(EmitContext& ctx) { NotImplemented(); } -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string continue_label) { +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { NotImplemented(); } @@ -132,20 +133,21 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string vertex) { +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { NotImplemented(); } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string value, std::string vertex) { +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex) { NotImplemented(); } -void EmitGetAttributeIndexed(EmitContext& ctx, std::string offset, std::string vertex) { +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { NotImplemented(); } -void EmitSetAttributeIndexed(EmitContext& ctx, std::string offset, std::string value, - std::string vertex) { +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex) { NotImplemented(); } @@ -153,19 +155,19 @@ void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { NotImplemented(); } -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string value) { +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { NotImplemented(); } -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string value) { +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { NotImplemented(); } -void EmitSetSampleMask(EmitContext& ctx, std::string value) { +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitSetFragDepth(EmitContext& ctx, std::string value) { +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -225,11 +227,11 @@ void EmitYDirection(EmitContext& ctx) { NotImplemented(); } -void EmitLoadLocal(EmitContext& ctx, std::string word_offset) { +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { NotImplemented(); } -void EmitWriteLocal(EmitContext& ctx, std::string word_offset, std::string value) { +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { NotImplemented(); } @@ -269,15 +271,15 @@ void EmitLoadGlobalS16(EmitContext& ctx) { NotImplemented(); } -void EmitLoadGlobal32(EmitContext& ctx, std::string address) { +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { NotImplemented(); } -void EmitLoadGlobal64(EmitContext& ctx, std::string address) { +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { NotImplemented(); } -void EmitLoadGlobal128(EmitContext& ctx, std::string address) { +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { NotImplemented(); } @@ -297,182 +299,185 @@ void EmitWriteGlobalS16(EmitContext& ctx) { NotImplemented(); } -void EmitWriteGlobal32(EmitContext& ctx, std::string address, std::string value) { +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { NotImplemented(); } -void EmitWriteGlobal64(EmitContext& ctx, std::string address, std::string value) { +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { NotImplemented(); } -void EmitWriteGlobal128(EmitContext& ctx, std::string address, std::string value) { +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { NotImplemented(); } -void EmitLoadSharedU8(EmitContext& ctx, std::string offset) { +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitLoadSharedS8(EmitContext& ctx, std::string offset) { +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitLoadSharedU16(EmitContext& ctx, std::string offset) { +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitLoadSharedS16(EmitContext& ctx, std::string offset) { +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitLoadSharedU32(EmitContext& ctx, std::string offset) { +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitLoadSharedU64(EmitContext& ctx, std::string offset) { +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitLoadSharedU128(EmitContext& ctx, std::string offset) { +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { NotImplemented(); } -void EmitWriteSharedU8(EmitContext& ctx, std::string offset, std::string value) { +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { NotImplemented(); } -void EmitWriteSharedU16(EmitContext& ctx, std::string offset, std::string value) { +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { NotImplemented(); } -void EmitWriteSharedU32(EmitContext& ctx, std::string offset, std::string value) { +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { NotImplemented(); } -void EmitWriteSharedU64(EmitContext& ctx, std::string offset, std::string value) { +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { NotImplemented(); } -void EmitWriteSharedU128(EmitContext& ctx, std::string offset, std::string value) { +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { NotImplemented(); } -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string e1, std::string e2) { +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { NotImplemented(); } -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3) { +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { NotImplemented(); } -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, - std::string e4) { +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { NotImplemented(); } -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string e1, std::string e2) { +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { NotImplemented(); } -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3) { +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { NotImplemented(); } -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, - std::string e4) { +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { NotImplemented(); } -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string e1, std::string e2) { +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { NotImplemented(); } -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string e1, std::string e2, std::string e3) { +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { NotImplemented(); } -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string e1, std::string e2, std::string e3, - std::string e4) { +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { NotImplemented(); } -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string composite, u32 index) { +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { NotImplemented(); } -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } @@ -501,58 +506,58 @@ void EmitCompositeExtractF64x4(EmitContext& ctx) { NotImplemented(); } -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string composite, std::string object, +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index) { NotImplemented(); } -void EmitSelectU1(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectU8(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectU16(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectU32(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectU64(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectF16(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectF32(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } -void EmitSelectF64(EmitContext& ctx, std::string cond, std::string true_value, - std::string false_value) { +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { NotImplemented(); } @@ -560,7 +565,7 @@ void EmitBitCastU16F16(EmitContext& ctx) { NotImplemented(); } -void EmitBitCastU32F32(EmitContext& ctx, std::string value) { +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -572,7 +577,7 @@ void EmitBitCastF16U16(EmitContext& ctx) { NotImplemented(); } -void EmitBitCastF32U32(EmitContext& ctx, std::string value) { +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -580,35 +585,35 @@ void EmitBitCastF64U64(EmitContext& ctx) { NotImplemented(); } -void EmitPackUint2x32(EmitContext& ctx, std::string value) { +void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitUnpackUint2x32(EmitContext& ctx, std::string value) { +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitPackFloat2x16(EmitContext& ctx, std::string value) { +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitUnpackFloat2x16(EmitContext& ctx, std::string value) { +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitPackHalf2x16(EmitContext& ctx, std::string value) { +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitUnpackHalf2x16(EmitContext& ctx, std::string value) { +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitPackDouble2x32(EmitContext& ctx, std::string value) { +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitUnpackDouble2x32(EmitContext& ctx, std::string value) { +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -636,529 +641,544 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitFPAbs16(EmitContext& ctx, std::string value) { +void EmitFPAbs16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPAbs32(EmitContext& ctx, std::string value) { +void EmitFPAbs32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPAbs64(EmitContext& ctx, std::string value) { +void EmitFPAbs64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { +void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c) { +void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { NotImplemented(); } -void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c) { +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { NotImplemented(); } -void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b, std::string c) { +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { NotImplemented(); } -void EmitFPMax32(EmitContext& ctx, std::string a, std::string b) { +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPMax64(EmitContext& ctx, std::string a, std::string b) { +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPMin32(EmitContext& ctx, std::string a, std::string b) { +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPMin64(EmitContext& ctx, std::string a, std::string b) { +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { +void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string a, std::string b) { +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitFPNeg16(EmitContext& ctx, std::string value) { +void EmitFPNeg16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPNeg32(EmitContext& ctx, std::string value) { +void EmitFPNeg32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPNeg64(EmitContext& ctx, std::string value) { +void EmitFPNeg64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPSin(EmitContext& ctx, std::string value) { +void EmitFPSin(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPCos(EmitContext& ctx, std::string value) { +void EmitFPCos(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPExp2(EmitContext& ctx, std::string value) { +void EmitFPExp2(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPLog2(EmitContext& ctx, std::string value) { +void EmitFPLog2(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPRecip32(EmitContext& ctx, std::string value) { +void EmitFPRecip32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPRecip64(EmitContext& ctx, std::string value) { +void EmitFPRecip64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPRecipSqrt32(EmitContext& ctx, std::string value) { +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPRecipSqrt64(EmitContext& ctx, std::string value) { +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPSqrt(EmitContext& ctx, std::string value) { +void EmitFPSqrt(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPSaturate16(EmitContext& ctx, std::string value) { +void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPSaturate32(EmitContext& ctx, std::string value) { +void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPSaturate64(EmitContext& ctx, std::string value) { +void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPClamp16(EmitContext& ctx, std::string value, std::string min_value, - std::string max_value) { +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { NotImplemented(); } -void EmitFPClamp32(EmitContext& ctx, std::string value, std::string min_value, - std::string max_value) { +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { NotImplemented(); } -void EmitFPClamp64(EmitContext& ctx, std::string value, std::string min_value, - std::string max_value) { +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { NotImplemented(); } -void EmitFPRoundEven16(EmitContext& ctx, std::string value) { +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPRoundEven32(EmitContext& ctx, std::string value) { +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPRoundEven64(EmitContext& ctx, std::string value) { +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPFloor16(EmitContext& ctx, std::string value) { +void EmitFPFloor16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPFloor32(EmitContext& ctx, std::string value) { +void EmitFPFloor32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPFloor64(EmitContext& ctx, std::string value) { +void EmitFPFloor64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPCeil16(EmitContext& ctx, std::string value) { +void EmitFPCeil16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPCeil32(EmitContext& ctx, std::string value) { +void EmitFPCeil32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPCeil64(EmitContext& ctx, std::string value) { +void EmitFPCeil64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPTrunc16(EmitContext& ctx, std::string value) { +void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPTrunc32(EmitContext& ctx, std::string value) { +void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPTrunc64(EmitContext& ctx, std::string value) { +void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPOrdEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdLessThan16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdLessThan32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdLessThan64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordLessThan16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordLessThan32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordLessThan64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string lhs, std::string rhs) { +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { NotImplemented(); } -void EmitFPIsNan16(EmitContext& ctx, std::string value) { +void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPIsNan32(EmitContext& ctx, std::string value) { +void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitFPIsNan64(EmitContext& ctx, std::string value) { +void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicInc32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicDec32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicOr32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicXor32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string pointer_offset, std::string value) { +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { NotImplemented(); } void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string value) { + const IR::Value& offset, std::string_view value) { NotImplemented(); } void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string value) { + const IR::Value& offset, std::string_view value) { NotImplemented(); } void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string value) { + std::string_view value) { NotImplemented(); } @@ -1278,211 +1298,211 @@ void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { NotImplemented(); } -void EmitLogicalOr(EmitContext& ctx, std::string a, std::string b) { +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitLogicalAnd(EmitContext& ctx, std::string a, std::string b) { +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitLogicalXor(EmitContext& ctx, std::string a, std::string b) { +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { NotImplemented(); } -void EmitLogicalNot(EmitContext& ctx, std::string value) { +void EmitLogicalNot(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS16F16(EmitContext& ctx, std::string value) { +void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS16F32(EmitContext& ctx, std::string value) { +void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS16F64(EmitContext& ctx, std::string value) { +void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS32F16(EmitContext& ctx, std::string value) { +void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS32F32(EmitContext& ctx, std::string value) { +void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS32F64(EmitContext& ctx, std::string value) { +void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS64F16(EmitContext& ctx, std::string value) { +void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS64F32(EmitContext& ctx, std::string value) { +void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS64F64(EmitContext& ctx, std::string value) { +void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU16F16(EmitContext& ctx, std::string value) { +void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU16F32(EmitContext& ctx, std::string value) { +void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU16F64(EmitContext& ctx, std::string value) { +void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU32F16(EmitContext& ctx, std::string value) { +void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU32F32(EmitContext& ctx, std::string value) { +void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU32F64(EmitContext& ctx, std::string value) { +void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU64F16(EmitContext& ctx, std::string value) { +void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU64F32(EmitContext& ctx, std::string value) { +void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU64F64(EmitContext& ctx, std::string value) { +void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU64U32(EmitContext& ctx, std::string value) { +void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertU32U64(EmitContext& ctx, std::string value) { +void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16F32(EmitContext& ctx, std::string value) { +void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32F16(EmitContext& ctx, std::string value) { +void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32F64(EmitContext& ctx, std::string value) { +void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64F32(EmitContext& ctx, std::string value) { +void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16S8(EmitContext& ctx, std::string value) { +void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16S16(EmitContext& ctx, std::string value) { +void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16S32(EmitContext& ctx, std::string value) { +void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16S64(EmitContext& ctx, std::string value) { +void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16U8(EmitContext& ctx, std::string value) { +void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16U16(EmitContext& ctx, std::string value) { +void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16U32(EmitContext& ctx, std::string value) { +void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF16U64(EmitContext& ctx, std::string value) { +void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32S8(EmitContext& ctx, std::string value) { +void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32S16(EmitContext& ctx, std::string value) { +void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32S32(EmitContext& ctx, std::string value) { +void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32S64(EmitContext& ctx, std::string value) { +void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32U8(EmitContext& ctx, std::string value) { +void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32U16(EmitContext& ctx, std::string value) { +void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32U32(EmitContext& ctx, std::string value) { +void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF32U64(EmitContext& ctx, std::string value) { +void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64S8(EmitContext& ctx, std::string value) { +void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64S16(EmitContext& ctx, std::string value) { +void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64S32(EmitContext& ctx, std::string value) { +void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64S64(EmitContext& ctx, std::string value) { +void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64U8(EmitContext& ctx, std::string value) { +void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64U16(EmitContext& ctx, std::string value) { +void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64U32(EmitContext& ctx, std::string value) { +void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertF64U64(EmitContext& ctx, std::string value) { +void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -1583,64 +1603,69 @@ void EmitBoundImageWrite(EmitContext&) { } void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string bias_lc, const IR::Value& offset) { + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { NotImplemented(); } void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string lod_lc, const IR::Value& offset) { + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { NotImplemented(); } void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string dref, std::string bias_lc, - const IR::Value& offset) { + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { NotImplemented(); } void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string dref, std::string lod_lc, - const IR::Value& offset) { + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { NotImplemented(); } -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - const IR::Value& offset, const IR::Value& offset2) { +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { NotImplemented(); } void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, const IR::Value& offset, const IR::Value& offset2, - std::string dref) { + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { NotImplemented(); } -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - std::string offset, std::string lod, std::string ms) { +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms) { NotImplemented(); } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string lod) { + std::string_view lod) { NotImplemented(); } void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords) { + std::string_view coords) { NotImplemented(); } -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - std::string derivates, std::string offset, std::string lod_clamp) { +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp) { NotImplemented(); } -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords) { +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords) { NotImplemented(); } -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string coords, - std::string color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view color) { NotImplemented(); } @@ -1733,57 +1758,57 @@ void EmitBoundImageAtomicExchange32(EmitContext&) { } void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string coords, std::string value) { + std::string_view coords, std::string_view value) { NotImplemented(); } @@ -1791,19 +1816,19 @@ void EmitLaneId(EmitContext& ctx) { NotImplemented(); } -void EmitVoteAll(EmitContext& ctx, std::string pred) { +void EmitVoteAll(EmitContext& ctx, std::string_view pred) { NotImplemented(); } -void EmitVoteAny(EmitContext& ctx, std::string pred) { +void EmitVoteAny(EmitContext& ctx, std::string_view pred) { NotImplemented(); } -void EmitVoteEqual(EmitContext& ctx, std::string pred) { +void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { NotImplemented(); } -void EmitSubgroupBallot(EmitContext& ctx, std::string pred) { +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { NotImplemented(); } @@ -1827,43 +1852,47 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask) { +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { NotImplemented(); } -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask) { +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { NotImplemented(); } -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask) { +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { NotImplemented(); } -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string value, std::string index, - std::string clamp, std::string segmentation_mask) { +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { NotImplemented(); } -void EmitFSwizzleAdd(EmitContext& ctx, std::string op_a, std::string op_b, std::string swizzle) { +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { NotImplemented(); } -void EmitDPdxFine(EmitContext& ctx, std::string op_a) { +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { NotImplemented(); } -void EmitDPdyFine(EmitContext& ctx, std::string op_a) { +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { NotImplemented(); } -void EmitDPdxCoarse(EmitContext& ctx, std::string op_a) { +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { NotImplemented(); } -void EmitDPdyCoarse(EmitContext& ctx, std::string op_a) { +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { NotImplemented(); } From 0f40b0e61ccc04216e0840e092dfe3051716b8b6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 20:56:46 -0400 Subject: [PATCH 567/770] glsl: Implement a few Integer instructions --- .../backend/glsl/emit_context.cpp | 7 + .../backend/glsl/emit_context.h | 43 +++- .../glsl/emit_glsl_bitwise_conversion.cpp | 13 + .../backend/glsl/emit_glsl_composite.cpp | 237 ++++++++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 10 +- .../backend/glsl/emit_glsl_integer.cpp | 51 ++-- .../glsl/emit_glsl_not_implemented.cpp | 214 ---------------- .../backend/glsl/emit_glsl_select.cpp | 61 +++++ .../backend/glsl/reg_alloc.cpp | 19 +- .../backend/glsl/reg_alloc.h | 3 + 10 files changed, 398 insertions(+), 260 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 8eea6344f..b3a3e5647 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -12,6 +12,7 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin const Profile& profile_) : info{program.info}, profile{profile_} { std::string header = "#version 450\n"; + SetupExtensions(header); if (program.stage == Stage::Compute) { header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", program.workgroup_size[0], program.workgroup_size[1], @@ -23,6 +24,12 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin code += "void main(){\n"; } +void EmitContext::SetupExtensions(std::string& header) { + if (info.uses_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } +} + void EmitContext::DefineConstantBuffers() { if (info.constant_buffer_descriptors.empty()) { return; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 81b970c14..f8cf8fdbc 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -38,28 +38,46 @@ public: // code += '\n'; // } - template - void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { - code += - fmt::format(format_str, reg_alloc.Define(inst, Type::U32), std::forward(args)...); + template + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(format_str, reg_alloc.Define(inst, type), std::forward(args)...); // TODO: Remove this code += '\n'; } + template + void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void AddS32(const char* format_str, IR::Inst& inst, Args&&... args) { - code += - fmt::format(format_str, reg_alloc.Define(inst, Type::S32), std::forward(args)...); - // TODO: Remove this - code += '\n'; + Add(format_str, inst, args...); } template void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { - code += - fmt::format(format_str, reg_alloc.Define(inst, Type::F32), std::forward(args)...); - // TODO: Remove this - code += '\n'; + Add(format_str, inst, args...); + } + + template + void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); } template @@ -75,6 +93,7 @@ public: const Profile& profile; private: + void SetupExtensions(std::string& header); void DefineConstantBuffers(); void DefineStorageBuffers(); }; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index f40f9900c..73cb66674 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -25,4 +25,17 @@ static void Alias(IR::Inst& inst, const IR::Value& value) { void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); } + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + ctx.AddU1("{}={};", inst, ctx.reg_alloc.Consume(value)); +} + +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=packUint2x32({});", inst, value); +} + +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index e69de29bb..2a7d207a7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -0,0 +1,237 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitCompositeConstructU32x2([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2) { + ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); +} + +void EmitCompositeConstructU32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructU32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3, + [[maybe_unused]] std::string_view e4) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractU32x2([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + ctx.AddU32("{}={}[{}];", inst, composite, index); +} + +void EmitCompositeExtractU32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractU32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3, + [[maybe_unused]] std::string_view e4) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view e1, + [[maybe_unused]] std::string_view e2, + [[maybe_unused]] std::string_view e3, + [[maybe_unused]] std::string_view e4) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF32x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF32x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view composite, + [[maybe_unused]] std::string_view object, + [[maybe_unused]] u32 index) { + throw NotImplementedException("GLSL Instruction"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index ff0c9cd95..51dbeb2c1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -142,12 +142,14 @@ void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_v void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, std::string_view e3); void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, std::string_view e3, std::string_view e4); -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, @@ -218,8 +220,8 @@ void EmitBitCastU64F64(EmitContext& ctx); void EmitBitCastF16U16(EmitContext& ctx); void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); void EmitBitCastF64U64(EmitContext& ctx); -void EmitPackUint2x32(EmitContext& ctx, std::string_view value); -void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); +void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index a22313141..016bccd39 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -1,4 +1,3 @@ - // Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -48,7 +47,7 @@ void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=abs({});", inst, value); } void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -59,52 +58,52 @@ void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view shift) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}<<{};", inst, base, shift); } void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view shift) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU64("{}={}<<{};", inst, base, shift); } void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view shift) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}>>{};", inst, base, shift); } void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view shift) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU64("{}={}>>{};", inst, base, shift); } void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view shift) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddS32("{}=int({})>>{};", inst, base, shift); } void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view shift) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); } void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}&{};", inst, a, b); } void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}|{};", inst, a, b); } void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}^{};", inst, a, b); } void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -141,7 +140,7 @@ void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=~{};", inst, value); } void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -156,22 +155,22 @@ void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=min(int({}), int({}));", inst, a, b); } void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=min(uint({}), uint({}));", inst, a, b); } void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=max(int({}), int({}));", inst, a, b); } void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=max(uint({}), uint({}));", inst, a, b); } void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -188,57 +187,57 @@ void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU1("{}=int({})int({});", inst, lhs, rhs); } void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); } void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU1("{}={}!={};", inst, lhs, rhs); } void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); } void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index ff2b30eee..de350b154 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -27,10 +27,6 @@ void EmitVoid(EmitContext& ctx) { NotImplemented(); } -void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { - NotImplemented(); -} - void EmitReference(EmitContext&) { NotImplemented(); } @@ -359,208 +355,6 @@ void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_ NotImplemented(); } -void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { - NotImplemented(); -} - -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { - NotImplemented(); -} - -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { - NotImplemented(); -} - -void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { - NotImplemented(); -} - -void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { - NotImplemented(); -} - -void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { - NotImplemented(); -} - -void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { - NotImplemented(); -} - -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3) { - NotImplemented(); -} - -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4) { - NotImplemented(); -} - -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeConstructF64x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeConstructF64x3(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeConstructF64x4(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeExtractF64x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeExtractF64x3(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeExtractF64x4(EmitContext& ctx) { - NotImplemented(); -} - -void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - NotImplemented(); -} - -void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - -void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value) { - NotImplemented(); -} - void EmitBitCastU16F16(EmitContext& ctx) { NotImplemented(); } @@ -585,14 +379,6 @@ void EmitBitCastF64U64(EmitContext& ctx) { NotImplemented(); } -void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index e69de29bb..86d38da98 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -0,0 +1,61 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitSelectU1([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, + [[maybe_unused]] std::string_view true_value, + [[maybe_unused]] std::string_view false_value) { + throw NotImplementedException("GLSL Instruction"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index f4886dbfd..3cfa16fea 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -57,9 +57,10 @@ std::string RegAlloc::Consume(const IR::Value& value) { std::string RegAlloc::Consume(IR::Inst& inst) { const Id id{inst.Definition()}; inst.DestructiveRemoveUsage(); - if (!inst.HasUses()) { - Free(id); - } + // TODO: reuse variables of same type if possible + // if (!inst.HasUses()) { + // Free(id); + // } return Representation(inst.Definition()); } @@ -69,14 +70,24 @@ std::string RegAlloc::GetType(Type type, u32 index) { } register_defined[index] = true; switch (type) { + case Type::U1: + return "bool "; case Type::U32: return "uint "; case Type::S32: return "int "; case Type::F32: return "float "; - default: + case Type::U64: + return "uint64_t "; + case Type::U32x2: + return "uvec2 "; + case Type::F32x2: + return "vec2 "; + case Type::Void: return ""; + default: + throw NotImplementedException("Type {}", type); } } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 9b98aab39..84ef7e822 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -16,11 +16,14 @@ class Value; namespace Shader::Backend::GLSL { enum class Type : u32 { + U1, U32, S32, F32, U64, F64, + U32x2, + F32x2, Void, }; From 266a3d60e3d8ee4b67a4a6b3e69d8632509b7a43 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 21:31:41 -0400 Subject: [PATCH 568/770] glsl: Implement BF* --- .../backend/glsl/emit_glsl_instructions.h | 4 ++-- .../backend/glsl/emit_glsl_integer.cpp | 10 +++++----- .../backend/glsl/emit_glsl_select.cpp | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 51dbeb2c1..5370af0c5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -204,8 +204,8 @@ void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true std::string_view false_value); void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 016bccd39..3f1b56a05 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -22,7 +22,7 @@ void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}-{};", inst, a, b); } void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -111,26 +111,26 @@ void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR:: [[maybe_unused]] std::string_view insert, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view count) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=bitfieldInsert({}, {}, int({}), int({}));", inst, base, insert, offset, count); } void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view count) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=bitfieldExtract(int({}), int({}), int({}));", inst, base, offset, count); } void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view base, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view count) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=bitfieldExtract({}, int({}), int({}));", inst, base, offset, count); } void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=bitfieldReverse({});", inst, value); } void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 86d38da98..4455b0f9f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -28,10 +28,11 @@ void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri throw NotImplementedException("GLSL Instruction"); } -void EmitSelectU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, +void EmitSelectU32([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] std::string_view cond, [[maybe_unused]] std::string_view true_value, [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, From ef7bd53f189a9bd11e4eebdcc142f60c2be8a504 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 21 May 2021 21:37:13 -0400 Subject: [PATCH 569/770] glsl: Implement more Integer ops --- .../backend/glsl/emit_glsl_instructions.h | 4 +- .../backend/glsl/emit_glsl_integer.cpp | 174 +++++++----------- .../backend/glsl/emit_glsl_select.cpp | 13 +- 3 files changed, 72 insertions(+), 119 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 5370af0c5..39c0ba859 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -210,8 +210,8 @@ void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view tru std::string_view false_value); void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); void EmitBitCastU16F16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 3f1b56a05..083d81ccb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -10,136 +10,108 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { -void EmitIAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}={}+{};", inst, a, b); } -void EmitIAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); +void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}+{};", inst, a, b); } -void EmitISub32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}={}-{};", inst, a, b); } -void EmitISub64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); +void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU64("{}={}-{};", inst, a, b); } -void EmitIMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); +void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU32("{}={}*{};", inst, a, b); } -void EmitINeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=-{};", inst, value); } -void EmitINeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=-{};", inst, value); } -void EmitIAbs32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=abs({});", inst, value); } -void EmitIAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=abs({});", inst, value); } -void EmitShiftLeftLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { ctx.AddU32("{}={}<<{};", inst, base, shift); } -void EmitShiftLeftLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { ctx.AddU64("{}={}<<{};", inst, base, shift); } -void EmitShiftRightLogical32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { ctx.AddU32("{}={}>>{};", inst, base, shift); } -void EmitShiftRightLogical64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { ctx.AddU64("{}={}>>{};", inst, base, shift); } -void EmitShiftRightArithmetic32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { ctx.AddS32("{}=int({})>>{};", inst, base, shift); } -void EmitShiftRightArithmetic64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view shift) { +void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view shift) { ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); } -void EmitBitwiseAnd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}={}&{};", inst, a, b); } -void EmitBitwiseOr32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}={}|{};", inst, a, b); } -void EmitBitwiseXor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}={}^{};", inst, a, b); } -void EmitBitFieldInsert([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view insert, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view count) { +void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view insert, std::string_view offset, std::string_view count) { ctx.AddU32("{}=bitfieldInsert({}, {}, int({}), int({}));", inst, base, insert, offset, count); } -void EmitBitFieldSExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view count) { +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { ctx.AddU32("{}=bitfieldExtract(int({}), int({}), int({}));", inst, base, offset, count); } -void EmitBitFieldUExtract([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view base, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view count) { +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, + std::string_view offset, std::string_view count) { ctx.AddU32("{}=bitfieldExtract({}, int({}), int({}));", inst, base, offset, count); } -void EmitBitReverse32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=bitfieldReverse({});", inst, value); } -void EmitBitCount32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=bitCount({});", inst, value); } -void EmitBitwiseNot32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=~{};", inst, value); } @@ -153,91 +125,75 @@ void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst throw NotImplementedException("GLSL Instruction"); } -void EmitSMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}=min(int({}), int({}));", inst, a, b); } -void EmitUMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}=min(uint({}), uint({}));", inst, a, b); } -void EmitSMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}=max(int({}), int({}));", inst, a, b); } -void EmitUMax32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddU32("{}=max(uint({}), uint({}));", inst, a, b); } -void EmitSClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, - [[maybe_unused]] std::string_view max) { - throw NotImplementedException("GLSL Instruction"); +void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + ctx.AddU32("{}=clamp(int({}), int({}), int({}));", inst, value, min, max); } -void EmitUClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min, - [[maybe_unused]] std::string_view max) { - throw NotImplementedException("GLSL Instruction"); +void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, + std::string_view max) { + ctx.AddU32("{}=clamp(uint({}), uint({}), uint({}));", inst, value, min, max); } -void EmitSLessThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { +void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { ctx.AddU1("{}=int({})int({});", inst, lhs, rhs); } -void EmitUGreaterThan([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); } -void EmitINotEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { +void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { ctx.AddU1("{}={}!={};", inst, lhs, rhs); } -void EmitSGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); } -void EmitUGreaterThanEqual([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { +void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 4455b0f9f..a6bf18fb1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -28,10 +28,8 @@ void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri throw NotImplementedException("GLSL Instruction"); } -void EmitSelectU32([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] std::string_view cond, - [[maybe_unused]] std::string_view true_value, - [[maybe_unused]] std::string_view false_value) { +void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); } @@ -47,10 +45,9 @@ void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri throw NotImplementedException("GLSL Instruction"); } -void EmitSelectF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, - [[maybe_unused]] std::string_view true_value, - [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); +void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, From 53667ddd4ebdaa98f9c40ef3aee8efbdb15a0a6f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Jul 2021 17:57:35 -0300 Subject: [PATCH 570/770] glsl: Fixup build issues --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4387532ab..602cf025b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -437,7 +437,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const auto code{EmitGLSL(profile, program, binding)}; + const auto code{EmitGLSL(profile, runtime_info, program, binding)}; OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } From 5e9095ef2203e7cddcaba84fa3b01cc0d940b634 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 22 May 2021 01:52:03 -0400 Subject: [PATCH 571/770] glsl: Add many FP32/64 instructions --- .../backend/glsl/emit_context.h | 10 + .../glsl/emit_glsl_bitwise_conversion.cpp | 49 ++ .../backend/glsl/emit_glsl_composite.cpp | 24 +- .../glsl/emit_glsl_context_get_set.cpp | 7 +- .../backend/glsl/emit_glsl_convert.cpp | 253 ++++++++ .../backend/glsl/emit_glsl_floating_point.cpp | 487 +++++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 324 +++++----- .../backend/glsl/emit_glsl_memory.cpp | 15 +- .../glsl/emit_glsl_not_implemented.cpp | 586 ------------------ .../backend/glsl/emit_glsl_select.cpp | 14 +- .../backend/glsl/reg_alloc.cpp | 4 + .../backend/glsl/reg_alloc.h | 3 +- 12 files changed, 1011 insertions(+), 765 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index f8cf8fdbc..9472f71b9 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -65,11 +65,21 @@ public: Add(format_str, inst, args...); } + template + void AddS64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); } + template + void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 73cb66674..a1e97b4cb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -30,6 +30,30 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) ctx.AddU1("{}={};", inst, ctx.reg_alloc.Consume(value)); } +void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=floatBitsToUint({});", inst, value); +} + +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); +} + +void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32("{}=uintBitsToFloat({});", inst, value); +} + +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=uint64BitsToDouble({});", inst, value); +} + void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU64("{}=packUint2x32({});", inst, value); } @@ -38,4 +62,29 @@ void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); } +void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packHalf2x16({});", inst, value); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value); +} + +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=packDouble2x32({});", inst, value); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 2a7d207a7..40b9ca08e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -10,9 +10,8 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { -void EmitCompositeConstructU32x2([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] std::string_view e1, - [[maybe_unused]] std::string_view e2) { +void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); } @@ -31,9 +30,8 @@ void EmitCompositeConstructU32x4([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("GLSL Instruction"); } -void EmitCompositeExtractU32x2([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { +void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { ctx.AddU32("{}={}[{}];", inst, composite, index); } @@ -130,10 +128,9 @@ void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("GLSL Instruction"); } -void EmitCompositeConstructF32x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view e1, - [[maybe_unused]] std::string_view e2) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2) { + ctx.AddF32x2("{}=uvec2({},{});", inst, e1, e2); } void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, @@ -151,10 +148,9 @@ void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("GLSL Instruction"); } -void EmitCompositeExtractF32x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}[{}];", inst, composite, index); } void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 2f4ecd6a1..d3d55562c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -37,9 +37,10 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, (u32_offset / 4) % 4); } -void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL"); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto u32_offset{offset.U32()}; + ctx.AddF32("{}=cbuf{}[{}][{}];", inst, binding.U32(), u32_offset / 16, (u32_offset / 4) % 4); } void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index e69de29bb..7ddc24c71 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -0,0 +1,253 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertS16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertS32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddS32("{}=int({});", inst, value); +} + +void EmitConvertS32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddS32("{}=int({});", inst, value); +} + +void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertS64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddS64("{}=int64_t({});", inst, value); +} + +void EmitConvertS64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddS64("{}=int64_t({});", inst, value); +} + +void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertU32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertU64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU64("{}=uint64_t({});", inst, value); +} + +void EmitConvertU32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU32("{}=uint({});", inst, value); +} + +void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF32S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF32U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=float({});", inst, value); +} + +void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF64S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF64S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitConvertF64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +void EmitConvertF64U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=double({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index e69de29bb..bea7600af 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -0,0 +1,487 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPAbs32([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=abs({});", inst, value); +} + +void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLSL"); +} + +void EmitFPAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF32("{}={}+{};", inst, a, b); +} + +void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF64("{}={}+{};", inst, a, b); +} + +void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + throw NotImplementedException("GLSL"); +} + +void EmitFPFma32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); +} + +void EmitFPFma64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, + [[maybe_unused]] std::string_view c) { + ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); +} + +void EmitFPMax32([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF32("{}=max({},{});", inst, a, b); +} + +void EmitFPMax64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF64("{}=max({},{});", inst, a, b); +} + +void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF32("{}=min({},{});", inst, a, b); +} + +void EmitFPMin64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF64("{}=min({},{});", inst, a, b); +} + +void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + throw NotImplementedException("GLSL"); +} + +void EmitFPMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF32("{}={}*{};", inst, a, b); +} + +void EmitFPMul64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { + ctx.AddF64("{}={}*{};", inst, a, b); +} + +void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPNeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=-{};", inst, value); +} + +void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=-{};", inst, value); +} + +void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=sin({});", inst, value); +} + +void EmitFPCos([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=cos({});", inst, value); +} + +void EmitFPExp2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=exp2({});", inst, value); +} + +void EmitFPLog2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=log2({});", inst, value); +} + +void EmitFPRecip32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=1/{};", inst, value); +} + +void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=1/{};", inst, value); +} + +void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=sqrt({});", inst, value); +} + +void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=sqrt({});", inst, value); +} + +void EmitFPSaturate32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=clamp({},0.0f,1.0f);", inst, value); +} + +void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=clamp({},0.0,1.0);", inst, value); +} + +void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + ctx.AddF32("{}=clamp({},{},{});", inst, value, min_value, max_value); +} + +void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view min_value, + [[maybe_unused]] std::string_view max_value) { + ctx.AddF64("{}=clamp({},{},{});", inst, value, min_value, max_value); +} + +void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=round({});", inst, value); +} + +void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=round({});", inst, value); +} + +void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPFloor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=floor({});", inst, value); +} + +void EmitFPFloor64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=floor({});", inst, value); +} + +void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPCeil32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=ceil({});", inst, value); +} + +void EmitFPCeil64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=ceil({});", inst, value); +} + +void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL"); +} + +void EmitFPTrunc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF32("{}=trunc({});", inst, value); +} + +void EmitFPTrunc64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddF64("{}=trunc({});", inst, value); +} + +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdLessThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordLessThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPOrdGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPUnordGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL"); +} + +void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +void EmitFPIsNan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +void EmitFPIsNan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view value) { + ctx.AddU1("{}=isnan({});", inst, value); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 39c0ba859..49993dc5c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -61,7 +61,8 @@ void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, @@ -172,12 +173,14 @@ void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std: u32 index); void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2); void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, std::string_view e3); void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, std::string_view e3, std::string_view e4); -void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, @@ -206,37 +209,37 @@ void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view tru std::string_view false_value); void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value); -void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); -void EmitBitCastU16F16(EmitContext& ctx); -void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); -void EmitBitCastF64U64(EmitContext& ctx); +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); +void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); -void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); -void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); void EmitGetSparseFromOp(EmitContext& ctx); void EmitGetInBoundsFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx, std::string_view value); -void EmitFPAbs32(EmitContext& ctx, std::string_view value); -void EmitFPAbs64(EmitContext& ctx, std::string_view value); +void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); @@ -246,85 +249,118 @@ void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri std::string_view c); void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c); -void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); -void EmitFPNeg16(EmitContext& ctx, std::string_view value); -void EmitFPNeg32(EmitContext& ctx, std::string_view value); -void EmitFPNeg64(EmitContext& ctx, std::string_view value); -void EmitFPSin(EmitContext& ctx, std::string_view value); -void EmitFPCos(EmitContext& ctx, std::string_view value); -void EmitFPExp2(EmitContext& ctx, std::string_view value); -void EmitFPLog2(EmitContext& ctx, std::string_view value); -void EmitFPRecip32(EmitContext& ctx, std::string_view value); -void EmitFPRecip64(EmitContext& ctx, std::string_view value); -void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); -void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); -void EmitFPSqrt(EmitContext& ctx, std::string_view value); -void EmitFPSaturate16(EmitContext& ctx, std::string_view value); -void EmitFPSaturate32(EmitContext& ctx, std::string_view value); -void EmitFPSaturate64(EmitContext& ctx, std::string_view value); -void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value); -void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); -void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); -void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); -void EmitFPFloor16(EmitContext& ctx, std::string_view value); -void EmitFPFloor32(EmitContext& ctx, std::string_view value); -void EmitFPFloor64(EmitContext& ctx, std::string_view value); -void EmitFPCeil16(EmitContext& ctx, std::string_view value); -void EmitFPCeil32(EmitContext& ctx, std::string_view value); -void EmitFPCeil64(EmitContext& ctx, std::string_view value); -void EmitFPTrunc16(EmitContext& ctx, std::string_view value); -void EmitFPTrunc32(EmitContext& ctx, std::string_view value); -void EmitFPTrunc64(EmitContext& ctx, std::string_view value); -void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); -void EmitFPIsNan16(EmitContext& ctx, std::string_view value); -void EmitFPIsNan32(EmitContext& ctx, std::string_view value); -void EmitFPIsNan64(EmitContext& ctx, std::string_view value); +void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); @@ -493,54 +529,54 @@ void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); void EmitLogicalNot(EmitContext& ctx, std::string_view value); -void EmitConvertS16F16(EmitContext& ctx, std::string_view value); -void EmitConvertS16F32(EmitContext& ctx, std::string_view value); -void EmitConvertS16F64(EmitContext& ctx, std::string_view value); -void EmitConvertS32F16(EmitContext& ctx, std::string_view value); -void EmitConvertS32F32(EmitContext& ctx, std::string_view value); -void EmitConvertS32F64(EmitContext& ctx, std::string_view value); -void EmitConvertS64F16(EmitContext& ctx, std::string_view value); -void EmitConvertS64F32(EmitContext& ctx, std::string_view value); -void EmitConvertS64F64(EmitContext& ctx, std::string_view value); -void EmitConvertU16F16(EmitContext& ctx, std::string_view value); -void EmitConvertU16F32(EmitContext& ctx, std::string_view value); -void EmitConvertU16F64(EmitContext& ctx, std::string_view value); -void EmitConvertU32F16(EmitContext& ctx, std::string_view value); -void EmitConvertU32F32(EmitContext& ctx, std::string_view value); -void EmitConvertU32F64(EmitContext& ctx, std::string_view value); -void EmitConvertU64F16(EmitContext& ctx, std::string_view value); -void EmitConvertU64F32(EmitContext& ctx, std::string_view value); -void EmitConvertU64F64(EmitContext& ctx, std::string_view value); -void EmitConvertU64U32(EmitContext& ctx, std::string_view value); -void EmitConvertU32U64(EmitContext& ctx, std::string_view value); -void EmitConvertF16F32(EmitContext& ctx, std::string_view value); -void EmitConvertF32F16(EmitContext& ctx, std::string_view value); -void EmitConvertF32F64(EmitContext& ctx, std::string_view value); -void EmitConvertF64F32(EmitContext& ctx, std::string_view value); -void EmitConvertF16S8(EmitContext& ctx, std::string_view value); -void EmitConvertF16S16(EmitContext& ctx, std::string_view value); -void EmitConvertF16S32(EmitContext& ctx, std::string_view value); -void EmitConvertF16S64(EmitContext& ctx, std::string_view value); -void EmitConvertF16U8(EmitContext& ctx, std::string_view value); -void EmitConvertF16U16(EmitContext& ctx, std::string_view value); -void EmitConvertF16U32(EmitContext& ctx, std::string_view value); -void EmitConvertF16U64(EmitContext& ctx, std::string_view value); -void EmitConvertF32S8(EmitContext& ctx, std::string_view value); -void EmitConvertF32S16(EmitContext& ctx, std::string_view value); -void EmitConvertF32S32(EmitContext& ctx, std::string_view value); -void EmitConvertF32S64(EmitContext& ctx, std::string_view value); -void EmitConvertF32U8(EmitContext& ctx, std::string_view value); -void EmitConvertF32U16(EmitContext& ctx, std::string_view value); -void EmitConvertF32U32(EmitContext& ctx, std::string_view value); -void EmitConvertF32U64(EmitContext& ctx, std::string_view value); -void EmitConvertF64S8(EmitContext& ctx, std::string_view value); -void EmitConvertF64S16(EmitContext& ctx, std::string_view value); -void EmitConvertF64S32(EmitContext& ctx, std::string_view value); -void EmitConvertF64S64(EmitContext& ctx, std::string_view value); -void EmitConvertF64U8(EmitContext& ctx, std::string_view value); -void EmitConvertF64U16(EmitContext& ctx, std::string_view value); -void EmitConvertF64U32(EmitContext& ctx, std::string_view value); -void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitBindlessImageSampleImplicitLod(EmitContext&); void EmitBindlessImageSampleExplicitLod(EmitContext&); void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 2344fd2a9..7c8c23050 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -76,18 +76,15 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("GLSL Instrucion"); } -void EmitWriteStorage32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { ctx.Add("buff{}[{}]={};", binding.U32(), offset.U32(), value); } -void EmitWriteStorage64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + ctx.Add("buff{}[{}]={}.x;", binding.U32(), offset.U32(), value); + ctx.Add("buff{}[{}]={}.y;", binding.U32(), offset.U32() + 1, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index de350b154..23f8730ca 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -355,54 +355,6 @@ void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_ NotImplemented(); } -void EmitBitCastU16F16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitBitCastU64F64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBitCastF16U16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitBitCastF64U64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } @@ -427,352 +379,6 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitFPAbs16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPAbs32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPAbs64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c) { - NotImplemented(); -} - -void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c) { - NotImplemented(); -} - -void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, - std::string_view c) { - NotImplemented(); -} - -void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitFPNeg16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPNeg32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPNeg64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSin(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCos(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPExp2(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPLog2(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecip32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecip64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSqrt(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { - NotImplemented(); -} - -void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { - NotImplemented(); -} - -void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, - std::string_view max_value) { - NotImplemented(); -} - -void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPFloor16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPFloor32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPFloor64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCeil16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCeil32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPCeil64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { - NotImplemented(); -} - -void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, std::string_view value) { NotImplemented(); @@ -1100,198 +706,6 @@ void EmitLogicalNot(EmitContext& ctx, std::string_view value) { NotImplemented(); } -void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index a6bf18fb1..a049e3dc9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -33,10 +33,9 @@ void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); } -void EmitSelectU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, - [[maybe_unused]] std::string_view true_value, - [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); +void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, @@ -50,10 +49,9 @@ void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); } -void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, - [[maybe_unused]] std::string_view true_value, - [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); +void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 3cfa16fea..039236689 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -78,8 +78,12 @@ std::string RegAlloc::GetType(Type type, u32 index) { return "int "; case Type::F32: return "float "; + case Type::S64: + return "int64_t "; case Type::U64: return "uint64_t "; + case Type::F64: + return "double "; case Type::U32x2: return "uvec2 "; case Type::F32x2: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 84ef7e822..63c940d3a 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -17,9 +17,10 @@ class Value; namespace Shader::Backend::GLSL { enum class Type : u32 { U1, - U32, S32, + U32, F32, + S64, U64, F64, U32x2, From 65c6f73e436ba3116030277a7a8bcb563f9554e2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 22 May 2021 02:32:57 -0400 Subject: [PATCH 572/770] glsl: More FP instructions/fixes --- .../backend/glsl/emit_glsl_floating_point.cpp | 10 +++---- .../backend/glsl/emit_glsl_instructions.h | 8 ++--- .../backend/glsl/emit_glsl_logical.cpp | 29 +++++++++++++++++++ .../glsl/emit_glsl_not_implemented.cpp | 16 ---------- .../backend/glsl/reg_alloc.cpp | 6 ++-- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index bea7600af..3f3a83b20 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -33,12 +33,12 @@ void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i void EmitFPAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.AddF32("{}={}+{};", inst, a, b); + ctx.AddF32("{}=float({})+float({});", inst, a, b); } void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - ctx.AddF64("{}={}+{};", inst, a, b); + ctx.AddF64("{}=double({})+double({});", inst, a, b); } void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -180,14 +180,14 @@ void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min_value, [[maybe_unused]] std::string_view max_value) { - ctx.AddF32("{}=clamp({},{},{});", inst, value, min_value, max_value); + ctx.AddF32("{}=clamp({},float({}),float({}));", inst, value, min_value, max_value); } void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min_value, [[maybe_unused]] std::string_view max_value) { - ctx.AddF64("{}=clamp({},{},{});", inst, value, min_value, max_value); + ctx.AddF64("{}=clamp({},double({}),double({}));", inst, value, min_value, max_value); } void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -259,7 +259,7 @@ void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitFPOrdEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + ctx.AddU1("{}={}=={};", inst, lhs, rhs); } void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 49993dc5c..efa515a3c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -525,10 +525,10 @@ void EmitGlobalAtomicMinF16x2(EmitContext& ctx); void EmitGlobalAtomicMinF32x2(EmitContext& ctx); void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); -void EmitLogicalNot(EmitContext& ctx, std::string_view value); +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp index e69de29bb..e4781c03c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp @@ -0,0 +1,29 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}||{};", inst, a, b); +} + +void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}&&{};", inst, a, b); +} + +void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + ctx.AddU1("{}={}^^{};", inst, a, b); +} + +void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU1("{}=!{};", inst, value); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 23f8730ca..cb6562ebf 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -690,22 +690,6 @@ void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { NotImplemented(); } -void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { - NotImplemented(); -} - -void EmitLogicalNot(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 039236689..c370aabb5 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -30,11 +30,11 @@ std::string MakeImm(const IR::Value& value) { case IR::Type::U1: return fmt::format("{}", value.U1() ? "true" : "false"); case IR::Type::U32: - return fmt::format("{}", value.U32()); + return fmt::format("{}u", value.U32()); case IR::Type::F32: - return fmt::format("{}", value.F32()); + return fmt::format("{}f", value.F32()); case IR::Type::U64: - return fmt::format("{}", value.U64()); + return fmt::format("{}ul", value.U64()); case IR::Type::F64: return fmt::format("{}", value.F64()); default: From 3064bde4155b865d61258e8fa87df9e6377578b6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 22 May 2021 15:06:14 -0400 Subject: [PATCH 573/770] glsl: FP function fixes --- src/shader_recompiler/backend/glsl/emit_context.cpp | 5 ++++- .../backend/glsl/emit_glsl_composite.cpp | 8 +++++--- .../backend/glsl/emit_glsl_context_get_set.cpp | 10 +++++++--- .../backend/glsl/emit_glsl_convert.cpp | 4 ++-- .../backend/glsl/emit_glsl_floating_point.cpp | 8 ++++---- .../backend/glsl/emit_glsl_memory.cpp | 5 ++--- src/shader_recompiler/backend/glsl/reg_alloc.cpp | 2 +- 7 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index b3a3e5647..da379360a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -48,8 +48,11 @@ void EmitContext::DefineStorageBuffers() { } u32 binding{}; for (const auto& desc : info.storage_buffers_descriptors) { - Add("layout(std430,binding={}) buffer buff_{}{{uint buff{}[];}};", binding, binding, + Add("layout(std430,binding={}) buffer ssbo_{}_u32{{uint ssbo{}_u32[];}};", binding, binding, desc.cbuf_index, desc.count); + // TODO: Track ssbo data type usage + Add("layout(std430,binding={}) buffer ssbo_{}_u64{{uvec2 ssbo{}_u64[];}};", binding, + binding, desc.cbuf_index, desc.count); ++binding; } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 40b9ca08e..8e7ad68bd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -10,6 +10,8 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { +static constexpr std::string_view SWIZZLE{"xyzw"}; + void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, std::string_view e2) { ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); @@ -32,7 +34,7 @@ void EmitCompositeConstructU32x4([[maybe_unused]] EmitContext& ctx, void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index) { - ctx.AddU32("{}={}[{}];", inst, composite, index); + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); } void EmitCompositeExtractU32x3([[maybe_unused]] EmitContext& ctx, @@ -130,7 +132,7 @@ void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, std::string_view e2) { - ctx.AddF32x2("{}=uvec2({},{});", inst, e1, e2); + ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); } void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, @@ -150,7 +152,7 @@ void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index) { - ctx.AddF32("{}={}[{}];", inst, composite, index); + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index d3d55562c..7c9cadd7e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -10,6 +10,8 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { +static constexpr std::string_view SWIZZLE{"xyzw"}; + void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); @@ -33,14 +35,16 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto u32_offset{offset.U32()}; - ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}][{}]);", inst, binding.U32(), u32_offset / 16, - (u32_offset / 4) % 4); + const auto index{(u32_offset / 4) % 4}; + ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}].{});", inst, binding.U32(), u32_offset / 16, + SWIZZLE[index]); } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto u32_offset{offset.U32()}; - ctx.AddF32("{}=cbuf{}[{}][{}];", inst, binding.U32(), u32_offset / 16, (u32_offset / 4) % 4); + const auto index{(u32_offset / 4) % 4}; + ctx.AddF32("{}=cbuf{}[{}].{};", inst, binding.U32(), u32_offset / 16, SWIZZLE[index]); } void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 7ddc24c71..0f95d4465 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -47,7 +47,7 @@ void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertS64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddS64("{}=int64_t({});", inst, value); + ctx.AddS64("{}=int64_t(double({}));", inst, value); } void EmitConvertS64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -92,7 +92,7 @@ void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertU64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU64("{}=uint64_t({});", inst, value); + ctx.AddU64("{}=uint64_t(double({}));", inst, value); } void EmitConvertU64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 3f3a83b20..2aa9f2cd3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -156,12 +156,12 @@ void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=sqrt({});", inst, value); + throw NotImplementedException("GLSL"); } void EmitFPSaturate32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=clamp({},0.0f,1.0f);", inst, value); + ctx.AddF32("{}=clamp({},0.0,1.0);", inst, value); } void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -197,12 +197,12 @@ void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=round({});", inst, value); + ctx.AddF32("{}=roundEven({});", inst, value); } void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=round({});", inst, value); + ctx.AddF64("{}=roundEven({});", inst, value); } void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 7c8c23050..b042ae853 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -78,13 +78,12 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("buff{}[{}]={};", binding.U32(), offset.U32(), value); + ctx.Add("ssbo{}_u32[{}]={};", binding.U32(), offset.U32(), value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("buff{}[{}]={}.x;", binding.U32(), offset.U32(), value); - ctx.Add("buff{}[{}]={}.y;", binding.U32(), offset.U32() + 1, value); + ctx.Add("ssbo{}_u64[{}]={};", binding.U32(), offset.U32(), value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index c370aabb5..73295a1e5 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -36,7 +36,7 @@ std::string MakeImm(const IR::Value& value) { case IR::Type::U64: return fmt::format("{}ul", value.U64()); case IR::Type::F64: - return fmt::format("{}", value.F64()); + return fmt::format("{}lf", value.F64()); default: throw NotImplementedException("Immediate type {}", value.Type()); } From ac7b0ebcb77f6c99d054e9d10412e669eefa0de6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 22 May 2021 23:03:27 -0400 Subject: [PATCH 574/770] glsl: More FP fixes --- .../backend/glsl/emit_context.cpp | 15 ++++++++++----- .../backend/glsl/emit_glsl_floating_point.cpp | 10 ++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index da379360a..67772c46d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -48,11 +48,16 @@ void EmitContext::DefineStorageBuffers() { } u32 binding{}; for (const auto& desc : info.storage_buffers_descriptors) { - Add("layout(std430,binding={}) buffer ssbo_{}_u32{{uint ssbo{}_u32[];}};", binding, binding, - desc.cbuf_index, desc.count); - // TODO: Track ssbo data type usage - Add("layout(std430,binding={}) buffer ssbo_{}_u64{{uvec2 ssbo{}_u64[];}};", binding, - binding, desc.cbuf_index, desc.count); + if (True(info.used_storage_buffer_types & IR::Type::U32) || + True(info.used_storage_buffer_types & IR::Type::F32)) { + Add("layout(std430,binding={}) buffer ssbo_{}_u32{{uint ssbo{}_u32[];}};", binding, + binding, desc.cbuf_index, desc.count); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x2) || + True(info.used_storage_buffer_types & IR::Type::F32x2)) { + Add("layout(std430,binding={}) buffer ssbo_{}_u64{{uvec2 ssbo{}_u64[];}};", binding, + binding, desc.cbuf_index, desc.count); + } ++binding; } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 2aa9f2cd3..19a3c236d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -161,12 +161,12 @@ void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitFPSaturate32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=clamp({},0.0,1.0);", inst, value); + ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); } void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=clamp({},0.0,1.0);", inst, value); + ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); } void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -180,14 +180,16 @@ void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min_value, [[maybe_unused]] std::string_view max_value) { - ctx.AddF32("{}=clamp({},float({}),float({}));", inst, value, min_value, max_value); + // GLSL's clamp does not produce desirable results + ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); } void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min_value, [[maybe_unused]] std::string_view max_value) { - ctx.AddF64("{}=clamp({},double({}),double({}));", inst, value, min_value, max_value); + // GLSL's clamp does not produce desirable results + ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); } void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From cdde7302196d6642724d36e8ed5a523dce702b6b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 22 May 2021 23:31:30 -0400 Subject: [PATCH 575/770] glsl: Add a more robust fp formatter --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 1 - .../backend/glsl/emit_glsl_floating_point.cpp | 4 ++-- .../backend/glsl/emit_glsl_integer.cpp | 4 ++-- src/shader_recompiler/backend/glsl/reg_alloc.cpp | 14 ++++++++++---- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index d1c58cefc..e48f152d0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -146,7 +146,6 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { std::string EmitGLSL(const Profile& profile, const RuntimeInfo&, IR::Program& program, Bindings& bindings) { EmitContext ctx{program, bindings, profile}; - // ctx.SetupBuffers(); EmitCode(ctx, program); ctx.code += "}"; return ctx.code; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 19a3c236d..e8c828e7c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -101,12 +101,12 @@ void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i void EmitFPNeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=-{};", inst, value); + ctx.AddF32("{}=-({});", inst, value); } void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=-{};", inst, value); + ctx.AddF64("{}=-({});", inst, value); } void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 083d81ccb..681bc1bfa 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -31,11 +31,11 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin } void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=-{};", inst, value); + ctx.AddU32("{}=-({});", inst, value); } void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=-{};", inst, value); + ctx.AddU64("{}=-({});", inst, value); } void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 73295a1e5..007f8c89d 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -10,10 +10,9 @@ #include "shader_recompiler/backend/glsl/reg_alloc.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/value.h" -#pragma optimize("", off) + namespace Shader::Backend::GLSL { namespace { - std::string Representation(Id id) { if (id.is_condition_code != 0) { throw NotImplementedException("Condition code"); @@ -25,6 +24,13 @@ std::string Representation(Id id) { return fmt::format("R{}", index); } +std::string FormatFloat(std::string_view value, IR::Type type) { + const bool needs_dot = value.find_first_of('.') == std::string_view::npos; + const bool needs_suffix = !value.ends_with('f'); + const auto suffix = type == IR::Type::F32 ? "f" : "lf"; + return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); +} + std::string MakeImm(const IR::Value& value) { switch (value.Type()) { case IR::Type::U1: @@ -32,11 +38,11 @@ std::string MakeImm(const IR::Value& value) { case IR::Type::U32: return fmt::format("{}u", value.U32()); case IR::Type::F32: - return fmt::format("{}f", value.F32()); + return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); case IR::Type::U64: return fmt::format("{}ul", value.U64()); case IR::Type::F64: - return fmt::format("{}lf", value.F64()); + return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); default: throw NotImplementedException("Immediate type {}", value.Type()); } From df793fc0493a67ca2838ba816232da8409d03c8a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 24 May 2021 00:55:39 -0400 Subject: [PATCH 576/770] glsl: Implement FCMP --- .../backend/glsl/emit_glsl_floating_point.cpp | 418 ++++++++---------- .../backend/glsl/emit_glsl_instructions.h | 35 +- .../backend/glsl/reg_alloc.cpp | 12 + 3 files changed, 204 insertions(+), 261 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index e8c828e7c..665fc1562 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -10,162 +10,151 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { +namespace { +void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, + std::string_view op, std::string_view, bool ordered, bool inequality = false) { + ctx.AddU1("{}={}{}{}", inst, lhs, op, rhs, lhs, rhs); + if (ordered && inequality) { + ctx.code += fmt::format("&&!isnan({})&&!isnan({})", lhs, rhs); + } else if (!ordered && !inequality) { + ctx.code += fmt::format("||!isnan({})||!isnan({})", lhs, rhs); + } + ctx.code += ";"; +} +} // namespace void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPAbs32([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=abs({});", inst, value); } -void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); +void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF64("{}=abs({});", inst, value); } void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPAdd32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF32("{}=float({})+float({});", inst, a, b); } -void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF64("{}=double({})+double({});", inst, a, b); } void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, [[maybe_unused]] std::string_view c) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPFma32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, - [[maybe_unused]] std::string_view c) { +void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); } -void EmitFPFma64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, - [[maybe_unused]] std::string_view c) { +void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + std::string_view c) { ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); } -void EmitFPMax32([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF32("{}=max({},{});", inst, a, b); } -void EmitFPMax64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF64("{}=max({},{});", inst, a, b); } -void EmitFPMin32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF32("{}=min({},{});", inst, a, b); } -void EmitFPMin64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF64("{}=min({},{});", inst, a, b); } void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPMul32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF32("{}={}*{};", inst, a, b); } -void EmitFPMul64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { +void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { ctx.AddF64("{}={}*{};", inst, a, b); } void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPNeg32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=-({});", inst, value); } -void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=-({});", inst, value); } -void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=sin({});", inst, value); } -void EmitFPCos([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=cos({});", inst, value); } -void EmitFPExp2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=exp2({});", inst, value); } -void EmitFPLog2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=log2({});", inst, value); } -void EmitFPRecip32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=1/{};", inst, value); } -void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=1/{};", inst, value); } void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPSqrt([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=sqrt({});", inst, value); } void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPSaturate32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); } -void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); } @@ -173,316 +162,269 @@ void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min_value, [[maybe_unused]] std::string_view max_value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPClamp32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value, - [[maybe_unused]] std::string_view min_value, - [[maybe_unused]] std::string_view max_value) { +void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { // GLSL's clamp does not produce desirable results ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); } -void EmitFPClamp64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value, - [[maybe_unused]] std::string_view min_value, - [[maybe_unused]] std::string_view max_value) { +void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view min_value, std::string_view max_value) { // GLSL's clamp does not produce desirable results ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); } void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPRoundEven32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=roundEven({});", inst, value); } -void EmitFPRoundEven64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=roundEven({});", inst, value); } void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPFloor32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=floor({});", inst, value); } -void EmitFPFloor64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=floor({});", inst, value); } void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPCeil32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=ceil({});", inst, value); } -void EmitFPCeil64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=ceil({});", inst, value); } void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL Instruction"); } -void EmitFPTrunc32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=trunc({});", inst, value); } -void EmitFPTrunc64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=trunc({});", inst, value); } -void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, +void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPOrdEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - ctx.AddU1("{}={}=={};", inst, lhs, rhs); +void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", "F", true); } -void EmitFPOrdEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", "F64", true); } -void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, +void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPUnordEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", "F", false); } -void EmitFPUnordEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "==", "F64", false); } -void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, +void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPOrdNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, +void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", "F", true, true); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", "F64", true, true); +} + +void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, + [[maybe_unused]] std::string_view rhs) { + throw NotImplementedException("GLSL instruction"); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", "F", false, true); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "!=", "F64", false, true); +} + +void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPOrdNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", "F", true); } -void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, +void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", "F64", true); +} + +void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPUnordNotEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", "F", false); } -void EmitFPUnordNotEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<", "F64", false); } -void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPOrdLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPOrdLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPUnordLessThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPUnordLessThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, +void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPOrdGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", "F", true); } -void EmitFPOrdGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", "F64", true); } -void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, +void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPUnordGreaterThan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, +void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", "F", false); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">", "F64", false); +} + +void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPUnordGreaterThan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", "F", true); } -void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", "F64", true); } -void EmitFPOrdLessThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPOrdLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); -} - -void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, +void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPUnordLessThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", "F", false); } -void EmitFPUnordLessThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, "<=", "F64", false); } -void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, +void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPOrdGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", "F", true); } -void EmitFPOrdGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", "F64", true); } void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); + throw NotImplementedException("GLSL instruction"); } -void EmitFPUnordGreaterThanEqual32([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", "F", false); } -void EmitFPUnordGreaterThanEqual64([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view lhs, - [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL"); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, + std::string_view rhs) { + Compare(ctx, inst, lhs, rhs, ">=", "F64", false); } void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL instruction"); +} + +void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU1("{}=isnan({});", inst, value); } -void EmitFPIsNan32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { - ctx.AddU1("{}=isnan({});", inst, value); -} - -void EmitFPIsNan64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU1("{}=isnan({});", inst, value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index efa515a3c..4e0487543 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -289,71 +289,60 @@ void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitFPOrdEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPUnordEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPOrdNotEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPUnordNotEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThan16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPOrdLessThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPUnordLessThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, - std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 007f8c89d..9f529c358 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -25,6 +25,18 @@ std::string Representation(Id id) { } std::string FormatFloat(std::string_view value, IR::Type type) { + // TODO: Confirm FP64 nan/inf + if (type == IR::Type::F32) { + if (value == "nan") { + return "uintBitsToFloat(0x7fc00000)"; + } + if (value == "inf") { + return "uintBitsToFloat(0x7f800000)"; + } + if (value == "-inf") { + return "uintBitsToFloat(0xff800000)"; + } + } const bool needs_dot = value.find_first_of('.') == std::string_view::npos; const bool needs_suffix = !value.ends_with('f'); const auto suffix = type == IR::Type::F32 ? "f" : "lf"; From 3d9ecbe99844c44074c26f2db4db376059f50534 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 24 May 2021 18:35:37 -0400 Subject: [PATCH 577/770] glsl: Wip storage atomic ops --- .../backend/glsl/emit_context.cpp | 42 ++- .../backend/glsl/emit_context.h | 8 +- .../backend/glsl/emit_glsl_atomic.cpp | 301 ++++++++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 110 ++++--- .../backend/glsl/emit_glsl_memory.cpp | 8 +- .../glsl/emit_glsl_not_implemented.cpp | 253 +-------------- .../backend/glsl/reg_alloc.cpp | 6 + .../backend/glsl/reg_alloc.h | 3 +- .../ir_opt/collect_shader_info_pass.cpp | 9 +- src/shader_recompiler/shader_info.h | 1 + 10 files changed, 414 insertions(+), 327 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 67772c46d..3c610a08a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -19,8 +19,10 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin program.workgroup_size[2]); } code += header; + DefineConstantBuffers(); DefineStorageBuffers(); + DefineHelperFunctions(); code += "void main(){\n"; } @@ -28,6 +30,15 @@ void EmitContext::SetupExtensions(std::string& header) { if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } + if (info.uses_int64_bit_atomics) { + header += "#extension GL_NV_shader_atomic_int64 : enable\n"; + } + if (info.uses_atomic_f32_add) { + header += "#extension GL_NV_shader_atomic_float : enable\n"; + } + if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { + header += "#extension NV_shader_atomic_fp16_vector : enable\n"; + } } void EmitContext::DefineConstantBuffers() { @@ -48,18 +59,39 @@ void EmitContext::DefineStorageBuffers() { } u32 binding{}; for (const auto& desc : info.storage_buffers_descriptors) { - if (True(info.used_storage_buffer_types & IR::Type::U32) || - True(info.used_storage_buffer_types & IR::Type::F32)) { + if (info.uses_s32_atomics) { + Add("layout(std430,binding={}) buffer ssbo_{}_s32{{int ssbo{}_s32[];}};", binding, + binding, desc.cbuf_index, desc.count); + } + if (True(info.used_storage_buffer_types & IR::Type::U32)) { Add("layout(std430,binding={}) buffer ssbo_{}_u32{{uint ssbo{}_u32[];}};", binding, binding, desc.cbuf_index, desc.count); } - if (True(info.used_storage_buffer_types & IR::Type::U32x2) || - True(info.used_storage_buffer_types & IR::Type::F32x2)) { - Add("layout(std430,binding={}) buffer ssbo_{}_u64{{uvec2 ssbo{}_u64[];}};", binding, + if (True(info.used_storage_buffer_types & IR::Type::F32)) { + Add("layout(std430,binding={}) buffer ssbo_{}_f32{{float ssbo{}_f32[];}};", binding, + binding, desc.cbuf_index, desc.count); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { + Add("layout(std430,binding={}) buffer ssbo_{}_u32x2{{uvec2 ssbo{}_u32x2[];}};", binding, + binding, desc.cbuf_index, desc.count); + } + if (True(info.used_storage_buffer_types & IR::Type::U64) || + True(info.used_storage_buffer_types & IR::Type::F64)) { + Add("layout(std430,binding={}) buffer ssbo_{}_u64{{uint64_t ssbo{}_u64[];}};", binding, binding, desc.cbuf_index, desc.count); } ++binding; } } +void EmitContext::DefineHelperFunctions() { + if (info.uses_global_increment) { + code += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; + } + if (info.uses_global_decrement) { + code += + "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; + } +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 9472f71b9..ca5657fe7 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -31,13 +31,6 @@ class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); - // template - // void Add(const char* format_str, IR::Inst& inst, Args&&... args) { - // code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); - // // TODO: Remove this - // code += '\n'; - // } - template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { code += fmt::format(format_str, reg_alloc.Define(inst, type), std::forward(args)...); @@ -106,6 +99,7 @@ private: void SetupExtensions(std::string& header); void DefineConstantBuffers(); void DefineStorageBuffers(); + void DefineHelperFunctions(); }; } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index e69de29bb..f3ef37873 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -0,0 +1,301 @@ + +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +namespace { +static constexpr std::string_view cas_loop{R"( +{} {}; +for (;;){{ + {} old_value={}; + {} = atomicCompSwap({},old_value,{}({},{})); + if ({}==old_value){{break;}} +}})"}; + +void CasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view ssbo, std::string_view value, + std::string_view type, std::string_view function) { + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add(cas_loop.data(), type, ret, type, ssbo, ret, ssbo, function, ssbo, value, ret); +} +} // namespace + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAdd(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddS32("{}=atomicMin(ssbo{}_s32[{}],int({}));", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMin(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddS32("{}=atomicMax(ssbo{}_s32[{}],int({}));", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicMax(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, std::string_view value) { + // const auto ret{ctx.reg_alloc.Define(inst)}; + // const auto type{"uint"}; + // ctx.Add(cas_loop.data(), type, ret, type, ssbo, ret, ssbo, "CasIncrement", ssbo, value, ret); + const std::string ssbo{fmt::format("ssbo{}_u32[{}]", binding.U32(), offset.U32())}; + CasFunction(ctx, inst, ssbo, value, "uint", "CasIncrement"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + const std::string ssbo{fmt::format("ssbo{}_u32[{}]", binding.U32(), offset.U32())}; + CasFunction(ctx, inst, ssbo, value, "uint", "CasDecrement"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicAnd(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicOr(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicXor(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU32("{}=atomicExchange(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + // ctx.AddU64("{}=atomicAdd(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU64("{}=ssbo{}_u64[{}];", inst, binding.U32(), offset.U32()); + ctx.Add("ssbo{}_u64[{}]+={};", binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddS64("{}=atomicMin(int64_t(ssbo{}_u64[{}]),int64_t({}));", inst, binding.U32(), + offset.U32(), value); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=atomicMin(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddS64("{}=atomicMax(int64_t(ssbo{}_u64[{}]),int64_t({}));", inst, binding.U32(), + offset.U32(), value); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=atomicMax(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=atomicAnd(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=atomicOr(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=atomicXor(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddU64("{}=atomicExchange(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + ctx.AddF32("{}=atomicAdd(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); +} + +void EmitStorageAtomicAddF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitStorageAtomicMinF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitStorageAtomicMaxF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] std::string_view value) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 4e0487543..c2836898f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -15,10 +15,7 @@ class Inst; class Value; } // namespace Shader::IR -#pragma optimize("", off) - namespace Shader::Backend::GLSL { - class EmitContext; inline void EmitSetLoopSafetyVariable(EmitContext&) {} @@ -114,7 +111,8 @@ void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Val void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -431,60 +429,60 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offse std::string_view value); void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, std::string_view value); -void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); -void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); -void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); -void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); void EmitGlobalAtomicIAdd32(EmitContext& ctx); void EmitGlobalAtomicSMin32(EmitContext& ctx); void EmitGlobalAtomicUMin32(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index b042ae853..d1e6f074d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -32,9 +32,9 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("GLSL Instrucion"); } -void EmitLoadStorage32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); +void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + ctx.AddU32("{}=ssbo{}_u32[{}];", inst, binding.U32(), offset.U32()); } void EmitLoadStorage64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, @@ -83,7 +83,7 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("ssbo{}_u64[{}]={};", binding.U32(), offset.U32(), value); + ctx.Add("ssbo{}_u32x2[{}]={};", binding.U32(), offset.U32(), value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index cb6562ebf..65eccaece 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -20,7 +20,7 @@ static void NotImplemented() { } void EmitPhi(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); + // NotImplemented(); } void EmitVoid(EmitContext& ctx) { @@ -439,257 +439,6 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offse NotImplemented(); } -void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - std::string_view value) { - NotImplemented(); -} - -void EmitGlobalAtomicIAdd32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMin32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMin32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMax32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMax32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicInc32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicDec32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAnd32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicOr32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicXor32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicExchange32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicIAdd64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMin64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMin64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicSMax64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicUMax64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicInc64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicDec64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAnd64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicOr64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicXor64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicExchange64(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAddF32(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAddF16x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicAddF32x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMinF16x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMinF32x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMaxF16x2(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { - NotImplemented(); -} - void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 9f529c358..8db1391fd 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -61,6 +61,12 @@ std::string MakeImm(const IR::Value& value) { } } // Anonymous namespace +std::string RegAlloc::Define(IR::Inst& inst) { + const Id id{Alloc()}; + inst.SetDefinition(id); + return Representation(id); +} + std::string RegAlloc::Define(IR::Inst& inst, Type type) { const Id id{Alloc()}; const auto type_str{GetType(type, id.index)}; diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 63c940d3a..7891c30e0 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -48,7 +48,8 @@ static_assert(sizeof(Id) == sizeof(u32)); class RegAlloc { public: - std::string Define(IR::Inst& inst, Type type = Type::Void); + std::string Define(IR::Inst& inst); + std::string Define(IR::Inst& inst, Type type); std::string Consume(const IR::Value& value); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fb2031fc8..884ade004 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -687,9 +687,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorage32: case IR::Opcode::WriteStorage32: case IR::Opcode::StorageAtomicIAdd32: - case IR::Opcode::StorageAtomicSMin32: case IR::Opcode::StorageAtomicUMin32: - case IR::Opcode::StorageAtomicSMax32: case IR::Opcode::StorageAtomicUMax32: case IR::Opcode::StorageAtomicAnd32: case IR::Opcode::StorageAtomicOr32: @@ -714,6 +712,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SharedAtomicExchange64: info.uses_int64_bit_atomics = true; break; + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::StorageAtomicSMin32: + case IR::Opcode::StorageAtomicSMax32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_s32_atomics = true; + break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: info.used_storage_buffer_types |= IR::Type::U32; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 32f8a50ea..be05eafcf 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -190,6 +190,7 @@ struct Info { bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; bool uses_int64_bit_atomics{}; + bool uses_s32_atomics{}; bool uses_global_memory{}; bool uses_atomic_image_u32{}; From e99d01ff5308bb239aa2007ba4363d3a77f4d202 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 24 May 2021 19:33:11 -0400 Subject: [PATCH 578/770] glsl: implement phi nodes --- .../backend/glsl/emit_glsl.cpp | 48 ++++++++++++------- .../glsl/emit_glsl_not_implemented.cpp | 7 ++- .../backend/glsl/reg_alloc.cpp | 17 +++++++ .../backend/glsl/reg_alloc.h | 2 + 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index e48f152d0..e5aaf81a7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -9,6 +10,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -96,6 +98,22 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } +void Precolor(EmitContext& ctx, const IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + ctx.Add("{};", ctx.reg_alloc.Define(phi, phi.Arg(0).Type())); + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.PhiMove(phi, phi.Arg(i)); + } + // Add reference to the phi node on the phi predecessor to avoid overwritting it + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } + } + } +} + void EmitCode(EmitContext& ctx, const IR::Program& program) { for (const IR::AbstractSyntaxNode& node : program.syntax_list) { switch (node.type) { @@ -105,37 +123,31 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } break; case IR::AbstractSyntaxNode::Type::If: - ctx.Add("if ("); + ctx.Add("if ({}){{", ctx.reg_alloc.Consume(node.data.if_node.cond)); break; case IR::AbstractSyntaxNode::Type::EndIf: - ctx.Add("){{"); - break; - case IR::AbstractSyntaxNode::Type::Loop: - ctx.Add("while ("); - break; - case IR::AbstractSyntaxNode::Type::Repeat: - if (node.data.repeat.cond.IsImmediate()) { - if (node.data.repeat.cond.U1()) { - ctx.Add("ENDREP;"); - } else { - ctx.Add("BRK;" - "ENDREP;"); - } - } + ctx.Add("}}"); break; case IR::AbstractSyntaxNode::Type::Break: if (node.data.break_node.cond.IsImmediate()) { if (node.data.break_node.cond.U1()) { ctx.Add("break;"); } + } else { + // TODO: implement this + ctx.Add("MOV.S.CC RC,{};" + "BRK (NE.x);", + 0); } break; case IR::AbstractSyntaxNode::Type::Return: case IR::AbstractSyntaxNode::Type::Unreachable: - ctx.Add("return;"); + ctx.Add("return;\n}}"); break; + case IR::AbstractSyntaxNode::Type::Loop: + case IR::AbstractSyntaxNode::Type::Repeat: default: - ctx.Add("UNAHNDLED {}", node.type); + throw NotImplementedException("{}", node.type); break; } } @@ -146,8 +158,8 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { std::string EmitGLSL(const Profile& profile, const RuntimeInfo&, IR::Program& program, Bindings& bindings) { EmitContext ctx{program, bindings, profile}; + Precolor(ctx, program); EmitCode(ctx, program); - ctx.code += "}"; return ctx.code; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 65eccaece..d67a1d81f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -28,11 +28,14 @@ void EmitVoid(EmitContext& ctx) { } void EmitReference(EmitContext&) { - NotImplemented(); + // NotImplemented(); } void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { - NotImplemented(); + if (phi == value) { + return; + } + ctx.Add("{}={};", ctx.reg_alloc.Consume(phi), ctx.reg_alloc.Consume(value)); } void EmitBranch(EmitContext& ctx, std::string_view label) { diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 8db1391fd..58c2c408e 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -74,6 +74,23 @@ std::string RegAlloc::Define(IR::Inst& inst, Type type) { return type_str + Representation(id); } +std::string RegAlloc::Define(IR::Inst& inst, IR::Type type) { + switch (type) { + case IR::Type::U1: + return Define(inst, Type::U1); + case IR::Type::U32: + return Define(inst, Type::U32); + case IR::Type::F32: + return Define(inst, Type::F32); + case IR::Type::U64: + return Define(inst, Type::U64); + case IR::Type::F64: + return Define(inst, Type::F64); + default: + throw NotImplementedException("IR type {}", type); + } +} + std::string RegAlloc::Consume(const IR::Value& value) { return value.IsImmediate() ? MakeImm(value) : Consume(*value.InstRecursive()); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 7891c30e0..581954e44 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -12,6 +12,7 @@ namespace Shader::IR { class Inst; class Value; +enum class Type; } // namespace Shader::IR namespace Shader::Backend::GLSL { @@ -50,6 +51,7 @@ class RegAlloc { public: std::string Define(IR::Inst& inst); std::string Define(IR::Inst& inst, Type type); + std::string Define(IR::Inst& inst, IR::Type type); std::string Consume(const IR::Value& value); From 11ba190462c7b69a47598b2d1572fac3bccc4adc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 01:35:30 -0400 Subject: [PATCH 579/770] glsl: Revert ssbo aliasing. Storage Atomics impl --- .../backend/glsl/emit_context.cpp | 42 +++--- .../backend/glsl/emit_glsl_atomic.cpp | 141 +++++++++++++----- .../backend/glsl/emit_glsl_instructions.h | 3 +- .../backend/glsl/emit_glsl_memory.cpp | 14 +- .../ir_opt/collect_shader_info_pass.cpp | 9 +- 5 files changed, 134 insertions(+), 75 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 3c610a08a..7986bf78f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -59,27 +59,8 @@ void EmitContext::DefineStorageBuffers() { } u32 binding{}; for (const auto& desc : info.storage_buffers_descriptors) { - if (info.uses_s32_atomics) { - Add("layout(std430,binding={}) buffer ssbo_{}_s32{{int ssbo{}_s32[];}};", binding, - binding, desc.cbuf_index, desc.count); - } - if (True(info.used_storage_buffer_types & IR::Type::U32)) { - Add("layout(std430,binding={}) buffer ssbo_{}_u32{{uint ssbo{}_u32[];}};", binding, - binding, desc.cbuf_index, desc.count); - } - if (True(info.used_storage_buffer_types & IR::Type::F32)) { - Add("layout(std430,binding={}) buffer ssbo_{}_f32{{float ssbo{}_f32[];}};", binding, - binding, desc.cbuf_index, desc.count); - } - if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { - Add("layout(std430,binding={}) buffer ssbo_{}_u32x2{{uvec2 ssbo{}_u32x2[];}};", binding, - binding, desc.cbuf_index, desc.count); - } - if (True(info.used_storage_buffer_types & IR::Type::U64) || - True(info.used_storage_buffer_types & IR::Type::F64)) { - Add("layout(std430,binding={}) buffer ssbo_{}_u64{{uint64_t ssbo{}_u64[];}};", binding, - binding, desc.cbuf_index, desc.count); - } + Add("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", binding, binding, + desc.cbuf_index, desc.count); ++binding; } } @@ -92,6 +73,25 @@ void EmitContext::DefineHelperFunctions() { code += "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } + if (info.uses_atomic_f32_add) { + code += "uint CasFloatAdd(uint op_a,uint op_b){return " + "floatBitsToUint(uintBitsToFloat(op_a)+uintBitsToFloat(op_b));}\n"; + } + if (info.uses_atomic_f32x2_add) { + code += "uint CasFloatAdd32x2(uint op_a,uint op_b){return " + "packHalf2x16(unpackHalf2x16(op_a)+unpackHalf2x16(op_b));}\n"; + } + if (info.uses_atomic_f32x2_min) { + code += "uint CasFloatMin32x2(uint op_a,uint op_b){return " + "packHalf2x16(min(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; + } + if (info.uses_atomic_f32x2_max) { + code += "uint CasFloatMax32x2(uint op_a,uint op_b){return " + "packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; + } + // TODO: Track this usage + code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; + code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index f3ef37873..0b29c213b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -13,132 +13,193 @@ namespace Shader::Backend::GLSL { namespace { static constexpr std::string_view cas_loop{R"( -{} {}; +uint {}; for (;;){{ - {} old_value={}; - {} = atomicCompSwap({},old_value,{}({},{})); + uint old_value={}; + {}=atomicCompSwap({},old_value,{}({},{})); if ({}==old_value){{break;}} }})"}; -void CasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view ssbo, std::string_view value, - std::string_view type, std::string_view function) { +void CasFunction(EmitContext& ctx, std::string_view ret, std::string_view ssbo, + std::string_view value, std::string_view function) { + ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); +} + +void CasFunctionInt32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { const auto ret{ctx.reg_alloc.Define(inst)}; - ctx.Add(cas_loop.data(), type, ret, type, ssbo, ret, ssbo, function, ssbo, value, ret); + const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + CasFunction(ctx, ret, ssbo, value, function); +} + +void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + const std::string u32_value{fmt::format("floatBitsToUint({})", value)}; + const auto ret{ctx.reg_alloc.Define(inst)}; + const auto ret_32{ret + "_u32"}; + CasFunction(ctx, ret_32, ssbo, u32_value, function); + ctx.Add("float {}=uintBitsToFloat({});", ret, ret_32); +} + +void CasFunctionF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + const std::string u32_value{fmt::format("packHalf2x16({})", value)}; + const auto ret{ctx.reg_alloc.Define(inst)}; + CasFunction(ctx, ret, ssbo, u32_value, function); } } // namespace void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicAdd(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicAdd(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddS32("{}=atomicMin(ssbo{}_s32[{}],int({}));", inst, binding.U32(), offset.U32(), value); + const std::string u32_value{fmt::format("uint({})", value)}; + CasFunctionInt32(ctx, inst, binding, offset, u32_value, "CasMinS32"); } void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicMin(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicMin(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddS32("{}=atomicMax(ssbo{}_s32[{}],int({}));", inst, binding.U32(), offset.U32(), value); + const std::string u32_value{fmt::format("uint({})", value)}; + CasFunctionInt32(ctx, inst, binding, offset, u32_value, "CasMaxS32"); } void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicMax(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicMax(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, std::string_view value) { - // const auto ret{ctx.reg_alloc.Define(inst)}; - // const auto type{"uint"}; - // ctx.Add(cas_loop.data(), type, ret, type, ssbo, ret, ssbo, "CasIncrement", ssbo, value, ret); - const std::string ssbo{fmt::format("ssbo{}_u32[{}]", binding.U32(), offset.U32())}; - CasFunction(ctx, inst, ssbo, value, "uint", "CasIncrement"); + const IR::Value& offset, std::string_view value) { + CasFunctionInt32(ctx, inst, binding, offset, value, "CasIncrement"); } void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - const std::string ssbo{fmt::format("ssbo{}_u32[{}]", binding.U32(), offset.U32())}; - CasFunction(ctx, inst, ssbo, value, "uint", "CasDecrement"); + CasFunctionInt32(ctx, inst, binding, offset, value, "CasDecrement"); } void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicAnd(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicAnd(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicOr(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicOr(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicXor(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicXor(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicExchange(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicExchange(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); } void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - // ctx.AddU64("{}=atomicAdd(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); - ctx.AddU64("{}=ssbo{}_u64[{}];", inst, binding.U32(), offset.U32()); - ctx.Add("ssbo{}_u64[{}]+={};", binding.U32(), offset.U32(), value); + // LOG_WARNING(..., "Op falling to non-atomic"); + ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + binding.U32(), offset.U32() + 1); + ctx.Add("ssbo{}[{}]+=unpackUint2x32({}).x;ssbo{}[{}]+=unpackUint2x32({}).y;", binding.U32(), + offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddS64("{}=atomicMin(int64_t(ssbo{}_u64[{}]),int64_t({}));", inst, binding.U32(), - offset.U32(), value); + // LOG_WARNING(..., "Op falling to non-atomic"); + ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + binding.U32(), offset.U32() + 1); + ctx.Add(R"( +for(int i=0;i<2;++i){{ +ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i])); +}} +)", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=atomicMin(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + // LOG_WARNING(..., "Op falling to non-atomic"); + ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + binding.U32(), offset.U32() + 1); + ctx.Add(R"( +for(int i=0;i<2;++i){{ +ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]); +}} +)", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddS64("{}=atomicMax(int64_t(ssbo{}_u64[{}]),int64_t({}));", inst, binding.U32(), - offset.U32(), value); + // LOG_WARNING(..., "Op falling to non-atomic"); + ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + binding.U32(), offset.U32() + 1); + ctx.Add(R"( +for(int i=0;i<2;++i){{ +ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i])); +}} +)", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=atomicMax(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + // LOG_WARNING(..., "Op falling to non-atomic"); + ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + binding.U32(), offset.U32() + 1); + ctx.Add(R"( +for(int i=0;i<2;++i){{ +ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]); +}} +)", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=atomicAnd(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU64("{}=uint64_t(uvec2(atomicAnd(ssbo{}[{}],unpackUint2x32({}).x),atomicAnd(ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=atomicOr(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU64("{}=uint64_t(uvec2(atomicOr(ssbo{}[{}],unpackUint2x32({}).x),atomicOr(ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=atomicXor(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU64("{}=uint64_t(uvec2(atomicXor(ssbo{}[{}],unpackUint2x32({}).x),atomicXor(ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=atomicExchange(ssbo{}_u64[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU64("{}=uint64_t(uvec2(atomicExchange(ssbo{}[{}],unpackUint2x32({}).x),atomicExchange(" + "ssbo{}[{}],unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddF32("{}=atomicAdd(ssbo{}_u32[{}],{});", inst, binding.U32(), offset.U32(), value); + CasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); } void EmitStorageAtomicAddF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -152,7 +213,7 @@ void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); } void EmitStorageAtomicMinF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -166,7 +227,7 @@ void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2"); } void EmitStorageAtomicMaxF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -180,7 +241,7 @@ void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2"); } void EmitGlobalAtomicIAdd32(EmitContext&) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index c2836898f..56b812d84 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -113,7 +113,8 @@ void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Va void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index d1e6f074d..8994c02a2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -34,12 +34,13 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - ctx.AddU32("{}=ssbo{}_u32[{}];", inst, binding.U32(), offset.U32()); + ctx.AddU32("{}=ssbo{}[{}];", inst, binding.U32(), offset.U32()); } -void EmitLoadStorage64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); +void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + ctx.AddU32x2("{}=uvec2(ssbo{}[{}],ssbo{}[{}]);", inst, binding.U32(), offset.U32(), + binding.U32(), offset.U32() + 1); } void EmitLoadStorage128([[maybe_unused]] EmitContext& ctx, @@ -78,12 +79,13 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("ssbo{}_u32[{}]={};", binding.U32(), offset.U32(), value); + ctx.Add("ssbo{}[{}]={};", binding.U32(), offset.U32(), value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("ssbo{}_u32x2[{}]={};", binding.U32(), offset.U32(), value); + ctx.Add("ssbo{}[{}]={}.x;", binding.U32(), offset.U32(), value); + ctx.Add("ssbo{}[{}]={}.y;", binding.U32(), offset.U32() + 1, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 884ade004..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -687,7 +687,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorage32: case IR::Opcode::WriteStorage32: case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicSMin32: case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicSMax32: case IR::Opcode::StorageAtomicUMax32: case IR::Opcode::StorageAtomicAnd32: case IR::Opcode::StorageAtomicOr32: @@ -712,13 +714,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SharedAtomicExchange64: info.uses_int64_bit_atomics = true; break; - case IR::Opcode::GlobalAtomicSMin32: - case IR::Opcode::GlobalAtomicSMax32: - case IR::Opcode::StorageAtomicSMin32: - case IR::Opcode::StorageAtomicSMax32: - info.used_storage_buffer_types |= IR::Type::U32; - info.uses_s32_atomics = true; - break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: info.used_storage_buffer_types |= IR::Type::U32; From 9cc1b8a873196dac5a97368df125816b5b195777 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 01:52:02 -0400 Subject: [PATCH 580/770] glsl: F16x2 storage atomics --- .../backend/glsl/emit_context.cpp | 16 ++++ .../backend/glsl/emit_context.h | 5 ++ .../backend/glsl/emit_glsl_atomic.cpp | 85 ++++++++----------- .../glsl/emit_glsl_bitwise_conversion.cpp | 9 +- .../backend/glsl/emit_glsl_instructions.h | 4 +- .../backend/glsl/reg_alloc.cpp | 2 + .../backend/glsl/reg_alloc.h | 1 + 7 files changed, 64 insertions(+), 58 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7986bf78f..a413219e3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -39,6 +39,10 @@ void EmitContext::SetupExtensions(std::string& header) { if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "#extension NV_shader_atomic_fp16_vector : enable\n"; } + if (info.uses_fp16) { + // TODO: AMD + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } } void EmitContext::DefineConstantBuffers() { @@ -89,6 +93,18 @@ void EmitContext::DefineHelperFunctions() { code += "uint CasFloatMax32x2(uint op_a,uint op_b){return " "packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; } + if (info.uses_atomic_f16x2_add) { + code += "uint CasFloatAdd16x2(uint op_a,uint op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+unpackFloat2x16(op_b));}\n"; + } + if (info.uses_atomic_f16x2_min) { + code += "uint CasFloatMin16x2(uint op_a,uint op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; + } + if (info.uses_atomic_f16x2_max) { + code += "uint CasFloatMax16x2(uint op_a,uint op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; + } // TODO: Track this usage code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index ca5657fe7..7f8857fa7 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -43,6 +43,11 @@ public: Add(format_str, inst, args...); } + template + void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 0b29c213b..b6b326762 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -12,8 +12,7 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view cas_loop{R"( -uint {}; +static constexpr std::string_view cas_loop{R"(uint {}; for (;;){{ uint old_value={}; {}=atomicCompSwap({},old_value,{}({},{})); @@ -49,6 +48,14 @@ void CasFunctionF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const auto ret{ctx.reg_alloc.Define(inst)}; CasFunction(ctx, ret, ssbo, u32_value, function); } + +void CasFunctionF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + const std::string u32_value{fmt::format("packFloat2x16({})", value)}; + const auto ret{ctx.reg_alloc.Define(inst)}; + CasFunction(ctx, ret, ssbo, u32_value, function); +} } // namespace void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -122,11 +129,8 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i])); -}} -)", + ctx.Add("for(int i=0;i<2;++i){{ " + "ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } @@ -135,12 +139,9 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]); -}} -)", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + ctx.Add( + "for(int i=0;i<2;++i){{ ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -148,11 +149,8 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i])); -}} -)", + ctx.Add("for(int i=0;i<2;++i){{ " + "ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } @@ -161,12 +159,9 @@ void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]); -}} -)", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + ctx.Add( + "for(int i=0;i<2;++i){{ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -202,45 +197,33 @@ void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& CasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); } -void EmitStorageAtomicAddF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); } -void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); } -void EmitStorageAtomicMinF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMin16x2"); } -void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2"); } -void EmitStorageAtomicMaxF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMax16x2"); } -void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index a1e97b4cb..742f394d4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -62,13 +62,12 @@ void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); } -void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packFloat2x16({});", inst, value); } -void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); } void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 56b812d84..9f32070b0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -224,8 +224,8 @@ void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 58c2c408e..3de4c1172 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -113,6 +113,8 @@ std::string RegAlloc::GetType(Type type, u32 index) { switch (type) { case Type::U1: return "bool "; + case Type::F16x2: + return "f16vec2 "; case Type::U32: return "uint "; case Type::S32: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 581954e44..9f2ff8718 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -18,6 +18,7 @@ enum class Type; namespace Shader::Backend::GLSL { enum class Type : u32 { U1, + F16x2, S32, U32, F32, From 3482df1176203b4999353e8266f42032536b561c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 02:01:32 -0400 Subject: [PATCH 581/770] glsl: Simply FP storage atomics --- .../backend/glsl/emit_context.cpp | 28 +++++------ .../backend/glsl/emit_glsl_atomic.cpp | 48 ++++++------------- 2 files changed, 28 insertions(+), 48 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index a413219e3..9c3fd44ba 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -78,32 +78,32 @@ void EmitContext::DefineHelperFunctions() { "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } if (info.uses_atomic_f32_add) { - code += "uint CasFloatAdd(uint op_a,uint op_b){return " - "floatBitsToUint(uintBitsToFloat(op_a)+uintBitsToFloat(op_b));}\n"; + code += "uint CasFloatAdd(uint op_a,float op_b){return " + "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; } if (info.uses_atomic_f32x2_add) { - code += "uint CasFloatAdd32x2(uint op_a,uint op_b){return " - "packHalf2x16(unpackHalf2x16(op_a)+unpackHalf2x16(op_b));}\n"; + code += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; } if (info.uses_atomic_f32x2_min) { - code += "uint CasFloatMin32x2(uint op_a,uint op_b){return " - "packHalf2x16(min(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; + code += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; } if (info.uses_atomic_f32x2_max) { - code += "uint CasFloatMax32x2(uint op_a,uint op_b){return " - "packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; + code += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; } if (info.uses_atomic_f16x2_add) { - code += "uint CasFloatAdd16x2(uint op_a,uint op_b){return " - "packFloat2x16(unpackFloat2x16(op_a)+unpackFloat2x16(op_b));}\n"; + code += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; } if (info.uses_atomic_f16x2_min) { - code += "uint CasFloatMin16x2(uint op_a,uint op_b){return " - "packFloat2x16(min(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; + code += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; } if (info.uses_atomic_f16x2_max) { - code += "uint CasFloatMax16x2(uint op_a,uint op_b){return " - "packFloat2x16(max(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; + code += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; } // TODO: Track this usage code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index b6b326762..0c3af75f5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -19,16 +19,11 @@ for (;;){{ if ({}==old_value){{break;}} }})"}; -void CasFunction(EmitContext& ctx, std::string_view ret, std::string_view ssbo, - std::string_view value, std::string_view function) { - ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); -} - -void CasFunctionInt32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset, std::string_view value, std::string_view function) { +void CasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { const auto ret{ctx.reg_alloc.Define(inst)}; const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; - CasFunction(ctx, ret, ssbo, value, function); + ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); } void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -37,25 +32,10 @@ void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const std::string u32_value{fmt::format("floatBitsToUint({})", value)}; const auto ret{ctx.reg_alloc.Define(inst)}; const auto ret_32{ret + "_u32"}; - CasFunction(ctx, ret_32, ssbo, u32_value, function); + ctx.Add(cas_loop.data(), ret_32, ssbo, ret_32, ssbo, function, ssbo, value, ret_32); ctx.Add("float {}=uintBitsToFloat({});", ret, ret_32); } -void CasFunctionF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset, std::string_view value, std::string_view function) { - const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; - const std::string u32_value{fmt::format("packHalf2x16({})", value)}; - const auto ret{ctx.reg_alloc.Define(inst)}; - CasFunction(ctx, ret, ssbo, u32_value, function); -} - -void CasFunctionF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset, std::string_view value, std::string_view function) { - const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; - const std::string u32_value{fmt::format("packFloat2x16({})", value)}; - const auto ret{ctx.reg_alloc.Define(inst)}; - CasFunction(ctx, ret, ssbo, u32_value, function); -} } // namespace void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -66,7 +46,7 @@ void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const std::string u32_value{fmt::format("uint({})", value)}; - CasFunctionInt32(ctx, inst, binding, offset, u32_value, "CasMinS32"); + CasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); } void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -77,7 +57,7 @@ void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const std::string u32_value{fmt::format("uint({})", value)}; - CasFunctionInt32(ctx, inst, binding, offset, u32_value, "CasMaxS32"); + CasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); } void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -87,12 +67,12 @@ void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionInt32(ctx, inst, binding, offset, value, "CasIncrement"); + CasFunction(ctx, inst, binding, offset, value, "CasIncrement"); } void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionInt32(ctx, inst, binding, offset, value, "CasDecrement"); + CasFunction(ctx, inst, binding, offset, value, "CasDecrement"); } void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -199,32 +179,32 @@ void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); + CasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); } void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); + CasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); } void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMin16x2"); + CasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); } void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2"); + CasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); } void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMax16x2"); + CasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); } void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2"); + CasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); } void EmitGlobalAtomicIAdd32(EmitContext&) { From bd24fa97138ff1e33a7f8d3c30a4f4482a6482a8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 19:55:40 -0400 Subject: [PATCH 582/770] glsl: Query GL Device for FP16 extension support --- src/shader_recompiler/backend/glsl/emit_context.cpp | 9 +++++++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_device.cpp | 2 ++ src/video_core/renderer_opengl/gl_device.h | 10 ++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 9c3fd44ba..6f769fa10 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { @@ -40,8 +41,12 @@ void EmitContext::SetupExtensions(std::string& header) { header += "#extension NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { - // TODO: AMD - header += "#extension GL_NV_gpu_shader5 : enable\n"; + if (profile.support_gl_nv_gpu_shader_5) { + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } + if (profile.support_gl_amd_gpu_shader_half_float) { + header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; + } } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3109fb69c..5d269368a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -83,6 +83,8 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_gl_nv_gpu_shader_5{}; + bool support_gl_amd_gpu_shader_half_float{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f7929f9e..071133781 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -158,6 +158,8 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 1ffd24883..9b9402c29 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,14 @@ public: return has_depth_buffer_float; } + bool HasNvGpuShader5() const { + return has_nv_gpu_shader_5; + } + + bool HasAmdShaderHalfFloat() const { + return has_amd_shader_half_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -151,6 +159,8 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_nv_gpu_shader_5{}; + bool has_amd_shader_half_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 602cf025b..e00d01e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -217,6 +217,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), + .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .warp_size_potentially_larger_than_guest = true, From 67f881e714ca5bd75c7f19f33e4d80352fad57c1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 20:54:34 -0400 Subject: [PATCH 583/770] glsl: Fix floating point compare ops Logic for ordered/unordered ops was wrong. --- .../backend/glsl/emit_glsl_floating_point.cpp | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 665fc1562..f3d1d1af0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -12,12 +12,12 @@ namespace Shader::Backend::GLSL { namespace { void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, - std::string_view op, std::string_view, bool ordered, bool inequality = false) { + std::string_view op, bool ordered) { ctx.AddU1("{}={}{}{}", inst, lhs, op, rhs, lhs, rhs); - if (ordered && inequality) { + if (ordered) { ctx.code += fmt::format("&&!isnan({})&&!isnan({})", lhs, rhs); - } else if (!ordered && !inequality) { - ctx.code += fmt::format("||!isnan({})||!isnan({})", lhs, rhs); + } else { + ctx.code += fmt::format("||isnan({})||isnan({})", lhs, rhs); } ctx.code += ";"; } @@ -236,12 +236,12 @@ void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::s void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "==", "F", true); + Compare(ctx, inst, lhs, rhs, "==", true); } void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "==", "F64", true); + Compare(ctx, inst, lhs, rhs, "==", true); } void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, @@ -251,12 +251,12 @@ void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std: void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "==", "F", false); + Compare(ctx, inst, lhs, rhs, "==", false); } void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "==", "F64", false); + Compare(ctx, inst, lhs, rhs, "==", false); } void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, @@ -266,12 +266,12 @@ void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "!=", "F", true, true); + Compare(ctx, inst, lhs, rhs, "!=", true); } void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "!=", "F64", true, true); + Compare(ctx, inst, lhs, rhs, "!=", true); } void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, @@ -281,12 +281,12 @@ void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] s void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "!=", "F", false, true); + Compare(ctx, inst, lhs, rhs, "!=", false); } void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "!=", "F64", false, true); + Compare(ctx, inst, lhs, rhs, "!=", false); } void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, @@ -296,12 +296,12 @@ void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<", "F", true); + Compare(ctx, inst, lhs, rhs, "<", true); } void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<", "F64", true); + Compare(ctx, inst, lhs, rhs, "<", true); } void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, @@ -311,12 +311,12 @@ void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] s void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<", "F", false); + Compare(ctx, inst, lhs, rhs, "<", false); } void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<", "F64", false); + Compare(ctx, inst, lhs, rhs, "<", false); } void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, @@ -327,12 +327,12 @@ void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">", "F", true); + Compare(ctx, inst, lhs, rhs, ">", true); } void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">", "F64", true); + Compare(ctx, inst, lhs, rhs, ">", true); } void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, @@ -343,12 +343,12 @@ void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">", "F", false); + Compare(ctx, inst, lhs, rhs, ">", false); } void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">", "F64", false); + Compare(ctx, inst, lhs, rhs, ">", false); } void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, @@ -359,12 +359,12 @@ void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<=", "F", true); + Compare(ctx, inst, lhs, rhs, "<=", true); } void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<=", "F64", true); + Compare(ctx, inst, lhs, rhs, "<=", true); } void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, @@ -375,12 +375,12 @@ void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<=", "F", false); + Compare(ctx, inst, lhs, rhs, "<=", false); } void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, "<=", "F64", false); + Compare(ctx, inst, lhs, rhs, "<=", false); } void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, @@ -391,12 +391,12 @@ void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">=", "F", true); + Compare(ctx, inst, lhs, rhs, ">=", true); } void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">=", "F64", true); + Compare(ctx, inst, lhs, rhs, ">=", true); } void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, @@ -407,12 +407,12 @@ void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">=", "F", false); + Compare(ctx, inst, lhs, rhs, ">=", false); } void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - Compare(ctx, inst, lhs, rhs, ">=", "F64", false); + Compare(ctx, inst, lhs, rhs, ">=", false); } void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From b95716e5431e7ddb05239c31080c01aab24a13ac Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 20:55:06 -0400 Subject: [PATCH 584/770] glsl: Update phi node management --- .../backend/glsl/emit_glsl.cpp | 37 +++++++++++++------ .../glsl/emit_glsl_not_implemented.cpp | 24 +++++++++--- .../backend/glsl/reg_alloc.cpp | 3 +- .../backend/glsl/reg_alloc.h | 10 +++-- 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index e5aaf81a7..a8e53cf66 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -98,18 +98,33 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } -void Precolor(EmitContext& ctx, const IR::Program& program) { +bool IsReference(IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::Reference; +} + +void PrecolorInst(IR::Inst& phi) { + // Insert phi moves before references to avoid overwritting other phis + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + IR::Block& phi_block{*phi.PhiBlock(i)}; + auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + } + } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } +} + +void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { - ctx.Add("{};", ctx.reg_alloc.Define(phi, phi.Arg(0).Type())); - const size_t num_args{phi.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - IR::IREmitter{*phi.PhiBlock(i)}.PhiMove(phi, phi.Arg(i)); - } - // Add reference to the phi node on the phi predecessor to avoid overwritting it - for (size_t i = 0; i < num_args; ++i) { - IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); - } + PrecolorInst(phi); } } } @@ -158,7 +173,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { std::string EmitGLSL(const Profile& profile, const RuntimeInfo&, IR::Program& program, Bindings& bindings) { EmitContext ctx{program, bindings, profile}; - Precolor(ctx, program); + Precolor(program); EmitCode(ctx, program); return ctx.code; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index d67a1d81f..b37b3c76d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -19,8 +19,15 @@ static void NotImplemented() { throw NotImplementedException("GLSL instruction"); } -void EmitPhi(EmitContext& ctx, IR::Inst& inst) { - // NotImplemented(); +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.reg_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + ctx.Add("{};", ctx.reg_alloc.Define(phi, phi.Arg(0).Type())); + } } void EmitVoid(EmitContext& ctx) { @@ -31,11 +38,18 @@ void EmitReference(EmitContext&) { // NotImplemented(); } -void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { - if (phi == value) { +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + ctx.Add("{};", ctx.reg_alloc.Define(phi, phi.Arg(0).Type())); + } + const auto phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; + const auto val_reg{ctx.reg_alloc.Consume(value)}; + if (phi_reg == val_reg) { return; } - ctx.Add("{}={};", ctx.reg_alloc.Consume(phi), ctx.reg_alloc.Consume(value)); + ctx.Add("{}={};", phi_reg, val_reg); } void EmitBranch(EmitContext& ctx, std::string_view label) { diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 3de4c1172..c60a87d91 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -146,10 +146,11 @@ Id RegAlloc::Alloc() { } register_use[reg] = true; Id ret{}; - ret.index.Assign(static_cast(reg)); + ret.is_valid.Assign(1); ret.is_long.Assign(0); ret.is_spill.Assign(0); ret.is_condition_code.Assign(0); + ret.index.Assign(static_cast(reg)); return ret; } } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 9f2ff8718..419e1e761 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -33,10 +33,12 @@ enum class Type : u32 { struct Id { union { u32 raw; - BitField<0, 29, u32> index; - BitField<29, 1, u32> is_long; - BitField<30, 1, u32> is_spill; - BitField<31, 1, u32> is_condition_code; + BitField<0, 1, u32> is_valid; + BitField<1, 1, u32> is_long; + BitField<2, 1, u32> is_spill; + BitField<3, 1, u32> is_condition_code; + BitField<4, 1, u32> is_null; + BitField<5, 27, u32> index; }; bool operator==(Id rhs) const noexcept { From 5399906c26292634ab3eec5fce88640092e9c4c2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 22:13:50 -0400 Subject: [PATCH 585/770] glsl: Track S32 atomics --- src/shader_recompiler/backend/glsl/emit_context.cpp | 9 ++++++--- .../ir_opt/collect_shader_info_pass.cpp | 10 ++++++++-- src/shader_recompiler/shader_info.h | 3 ++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 6f769fa10..7b6c6d22b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -110,9 +110,12 @@ void EmitContext::DefineHelperFunctions() { code += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; } - // TODO: Track this usage - code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; - code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; + if (info.uses_atomic_s32_min) { + code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; + } + if (info.uses_atomic_s32_max) { + code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; + } } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fb2031fc8..c22e5992a 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -687,9 +687,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorage32: case IR::Opcode::WriteStorage32: case IR::Opcode::StorageAtomicIAdd32: - case IR::Opcode::StorageAtomicSMin32: case IR::Opcode::StorageAtomicUMin32: - case IR::Opcode::StorageAtomicSMax32: case IR::Opcode::StorageAtomicUMax32: case IR::Opcode::StorageAtomicAnd32: case IR::Opcode::StorageAtomicOr32: @@ -759,6 +757,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_max = true; break; + case IR::Opcode::StorageAtomicSMin32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_s32_min = true; + break; + case IR::Opcode::StorageAtomicSMax32: + info.used_storage_buffer_types |= IR::Type::U32; + info.uses_atomic_s32_max = true; + break; case IR::Opcode::GlobalAtomicIAdd64: case IR::Opcode::GlobalAtomicSMin64: case IR::Opcode::GlobalAtomicUMin64: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index be05eafcf..9f7f0b42c 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -189,8 +189,9 @@ struct Info { bool uses_atomic_f32x2_add{}; bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; + bool uses_atomic_s32_min{}; + bool uses_atomic_s32_max{}; bool uses_int64_bit_atomics{}; - bool uses_s32_atomics{}; bool uses_global_memory{}; bool uses_atomic_image_u32{}; From 3d086e6130a2c5f0546ccef3b234c65ef2f0c99b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 26 May 2021 00:16:20 -0400 Subject: [PATCH 586/770] glsl: Implement some attribute getters and setters --- .../backend/glsl/emit_context.cpp | 52 ++++- .../backend/glsl/emit_context.h | 9 + .../backend/glsl/emit_glsl_composite.cpp | 14 +- .../glsl/emit_glsl_context_get_set.cpp | 53 +++++ .../backend/glsl/emit_glsl_image.cpp | 205 ++++++++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 9 +- .../glsl/emit_glsl_not_implemented.cpp | 178 +-------------- .../backend/glsl/reg_alloc.cpp | 6 + .../backend/glsl/reg_alloc.h | 2 + .../renderer_opengl/gl_shader_cache.cpp | 1 - 10 files changed, 337 insertions(+), 192 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7b6c6d22b..8e5983909 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -14,17 +14,63 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin : info{program.info}, profile{profile_} { std::string header = "#version 450\n"; SetupExtensions(header); - if (program.stage == Stage::Compute) { + stage = program.stage; + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vertex"; + attrib_name = "vertex"; + // TODO: add only what's used by the shader + header += + "out gl_PerVertex {vec4 gl_Position;float gl_PointSize;float gl_ClipDistance[];};"; + break; + case Stage::TessellationControl: + case Stage::TessellationEval: + stage_name = "primitive"; + attrib_name = "primitive"; + break; + case Stage::Geometry: + stage_name = "primitive"; + attrib_name = "vertex"; + break; + case Stage::Fragment: + stage_name = "fragment"; + attrib_name = "fragment"; + break; + case Stage::Compute: + stage_name = "invocation"; header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", program.workgroup_size[0], program.workgroup_size[1], program.workgroup_size[2]); + break; } code += header; - + const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const auto& generic{info.input_generics[index]}; + if (generic.used) { + Add("layout(location={})in vec4 in_attr{};", index, index); + } + } + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index]) { + continue; + } + Add("layout(location={})out vec4 frag_color{};", index, index); + } + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { + Add("layout(location={}) out vec4 out_attr{};", index, index); + } + } DefineConstantBuffers(); DefineStorageBuffers(); DefineHelperFunctions(); - code += "void main(){\n"; + Add("void main(){{"); + + if (stage == Stage::VertexA || stage == Stage::VertexB) { + Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); + } } void EmitContext::SetupExtensions(std::string& header) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 7f8857fa7..087eaff6a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -88,6 +88,11 @@ public: Add(format_str, inst, args...); } + template + void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void Add(const char* format_str, Args&&... args) { code += fmt::format(format_str, std::forward(args)...); @@ -100,6 +105,10 @@ public: const Info& info; const Profile& profile; + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view attrib_name = "invalid"; + private: void SetupExtensions(std::string& header); void DefineConstantBuffers(); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 8e7ad68bd..048b12f38 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -155,16 +155,14 @@ void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 7c9cadd7e..441818c0b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -51,4 +51,57 @@ void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.AddF32("{}=in_attr{}.{};", inst, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.AddF32("{}=gl_Position.{};", inst, swizzle); + break; + default: + fmt::print("Get attribute {}", attr); + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("out_attr{}.{}={};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::PointSize: + ctx.Add("gl_Pointsize={};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("gl_Position.{}={};", swizzle, value); + break; + default: + fmt::print("Set attribute {}", attr); + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + const char swizzle{"xyzw"[component]}; + ctx.Add("frag_color{}.{}={};", index, swizzle, value); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index e69de29bb..109938e0e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -0,0 +1,205 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view dref, + [[maybe_unused]] std::string_view bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view dref, + [[maybe_unused]] std::string_view lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2, + [[maybe_unused]] std::string_view dref) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view lod, + [[maybe_unused]] std::string_view ms) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view lod) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view derivates, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view lod_clamp) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view color) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 9f32070b0..49ab108bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -61,7 +61,8 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); @@ -180,8 +181,10 @@ void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::str std::string_view e3, std::string_view e4); void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b37b3c76d..14a2edd74 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -83,7 +83,7 @@ void EmitUnreachable(EmitContext& ctx) { } void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { - NotImplemented(); + ctx.Add("discard;"); } void EmitBarrier(EmitContext& ctx) { @@ -146,15 +146,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { - NotImplemented(); -} - -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex) { - NotImplemented(); -} - void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { NotImplemented(); } @@ -172,10 +163,6 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { NotImplemented(); } -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { - NotImplemented(); -} - void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -456,169 +443,6 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offse NotImplemented(); } -void EmitBindlessImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { - NotImplemented(); -} - -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref) { - NotImplemented(); -} - -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms) { - NotImplemented(); -} - -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { - NotImplemented(); -} - -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { - NotImplemented(); -} - -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp) { - NotImplemented(); -} - -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { - NotImplemented(); -} - -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view color) { - NotImplemented(); -} - void EmitBindlessImageAtomicIAdd32(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index c60a87d91..a080d5341 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -55,6 +55,8 @@ std::string MakeImm(const IR::Value& value) { return fmt::format("{}ul", value.U64()); case IR::Type::F64: return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -131,6 +133,10 @@ std::string RegAlloc::GetType(Type type, u32 index) { return "uvec2 "; case Type::F32x2: return "vec2 "; + case Type::U32x4: + return "uvec4 "; + case Type::F32x4: + return "vec4 "; case Type::Void: return ""; default: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 419e1e761..df067d3ad 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -27,6 +27,8 @@ enum class Type : u32 { F64, U32x2, F32x2, + U32x4, + F32x4, Void, }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e00d01e34..8a052851b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -440,7 +440,6 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } From d171083d53e106c8c5131522fdc81d51360c562d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 26 May 2021 21:18:17 -0400 Subject: [PATCH 587/770] glsl: textures wip --- .../backend/glsl/emit_context.cpp | 54 ++++++++++++-- .../backend/glsl/emit_context.h | 8 +++ .../backend/glsl/emit_glsl.cpp | 6 +- .../glsl/emit_glsl_bitwise_conversion.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 24 ++++++- .../backend/glsl/emit_glsl_instructions.h | 10 +-- .../glsl/emit_glsl_not_implemented.cpp | 27 ++++--- .../backend/glsl/reg_alloc.cpp | 72 +++++++------------ .../backend/glsl/reg_alloc.h | 11 +-- 9 files changed, 139 insertions(+), 75 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 8e5983909..de19e0fba 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -8,9 +8,21 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { +namespace { +std::string_view InterpDecorator(Interpolation interp) { + switch (interp) { + case Interpolation::Smooth: + return ""; + case Interpolation::Flat: + return "flat"; + case Interpolation::NoPerspective: + return "noperspective"; + } + throw InvalidArgument("Invalid interpolation {}", interp); +} +} // namespace -EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindings, - const Profile& profile_) +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_) : info{program.info}, profile{profile_} { std::string header = "#version 450\n"; SetupExtensions(header); @@ -49,7 +61,8 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { - Add("layout(location={})in vec4 in_attr{};", index, index); + Add("layout(location={}) {} in vec4 in_attr{};", index, + InterpDecorator(generic.interpolation), index); } } for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { @@ -66,6 +79,7 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin DefineConstantBuffers(); DefineStorageBuffers(); DefineHelperFunctions(); + SetupImages(bindings); Add("void main(){{"); if (stage == Stage::VertexA || stage == Stage::VertexB) { @@ -102,7 +116,7 @@ void EmitContext::DefineConstantBuffers() { } u32 binding{}; for (const auto& desc : info.constant_buffer_descriptors) { - Add("layout(std140,binding={}) uniform cbuf_{}{{vec4 cbuf{}[{}];}};", binding, binding, + Add("layout(std140,binding={}) uniform cbuf_{}{{vec4 cbuf{}[{}];}};", binding, desc.index, desc.index, 4 * 1024); ++binding; } @@ -164,4 +178,36 @@ void EmitContext::DefineHelperFunctions() { } } +void EmitContext::SetupImages(Bindings& bindings) { + image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { + throw NotImplementedException("image_buffer_descriptors"); + image_buffer_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + image_bindings.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { + throw NotImplementedException("image_bindings"); + + image_bindings.push_back(bindings.image); + bindings.image += desc.count; + } + texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { + throw NotImplementedException("TextureType::Buffer"); + + texture_buffer_bindings.push_back(bindings.texture); + bindings.texture += desc.count; + } + texture_bindings.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { + texture_bindings.push_back(bindings.texture); + const auto indices{bindings.texture + desc.count}; + for (u32 index = bindings.texture; index < indices; ++index) { + Add("layout(binding={}) uniform sampler2D tex{};", bindings.texture, index); + } + bindings.texture += desc.count; + } +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 087eaff6a..1cd051b24 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -6,6 +6,8 @@ #include #include +#include + #include #include "shader_recompiler/backend/glsl/reg_alloc.h" @@ -109,11 +111,17 @@ public: std::string_view stage_name = "invalid"; std::string_view attrib_name = "invalid"; + std::vector texture_buffer_bindings; + std::vector image_buffer_bindings; + std::vector texture_bindings; + std::vector image_bindings; + private: void SetupExtensions(std::string& header); void DefineConstantBuffers(); void DefineStorageBuffers(); void DefineHelperFunctions(); + void SetupImages(Bindings& bindings); }; } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index a8e53cf66..35dbe19ec 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -113,7 +113,7 @@ void PrecolorInst(IR::Inst& phi) { if (arg.IsImmediate()) { ir.PhiMove(phi, arg); } else { - ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + ir.PhiMove(phi, IR::Value{&*arg.InstRecursive()}); } } for (size_t i = 0; i < num_args; ++i) { @@ -157,7 +157,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { break; case IR::AbstractSyntaxNode::Type::Return: case IR::AbstractSyntaxNode::Type::Unreachable: - ctx.Add("return;\n}}"); + ctx.Add("return;"); break; case IR::AbstractSyntaxNode::Type::Loop: case IR::AbstractSyntaxNode::Type::Repeat: @@ -175,6 +175,8 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo&, IR::Program& pr EmitContext ctx{program, bindings, profile}; Precolor(program); EmitCode(ctx, program); + ctx.code += "}"; + fmt::print("\n{}\n", ctx.code); return ctx.code; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 742f394d4..8512147e2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -15,7 +15,7 @@ static void Alias(IR::Inst& inst, const IR::Value& value) { if (value.IsImmediate()) { return; } - IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; + IR::Inst& value_inst{*value.InstRecursive()}; value_inst.DestructiveAddUsage(inst.UseCount()); value_inst.DestructiveRemoveUsage(); inst.SetDefinition(value_inst.Definition()); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 109938e0e..cc5afc048 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -6,17 +6,39 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { +namespace { +std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { + if (info.type == TextureType::Buffer) { + throw NotImplementedException("TextureType::Buffer"); + } else { + return fmt::format("tex{}", ctx.texture_bindings.at(info.descriptor_index)); + } +} +} // namespace void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords, [[maybe_unused]] std::string_view bias_lc, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("Lod clamp samples"); + } + if (!offset.IsEmpty()) { + throw NotImplementedException("Offset"); + } + const auto texture{Texture(ctx, info, index)}; + ctx.AddF32x4("{}=texture({},{});", inst, texture, coords); } void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 49ab108bb..c9b53bae2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -89,11 +89,11 @@ void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx); void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); -void EmitUndefU1(EmitContext& ctx); -void EmitUndefU8(EmitContext& ctx); -void EmitUndefU16(EmitContext& ctx); -void EmitUndefU32(EmitContext& ctx); -void EmitUndefU64(EmitContext& ctx); +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 14a2edd74..42b1e8764 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -39,17 +39,26 @@ void EmitReference(EmitContext&) { } void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { - IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; + IR::Inst& phi{*phi_value.InstRecursive()}; + const auto phi_type{phi.Arg(0).Type()}; if (!phi.Definition().is_valid) { // The phi node wasn't forward defined - ctx.Add("{};", ctx.reg_alloc.Define(phi, phi.Arg(0).Type())); + ctx.Add("{};", ctx.reg_alloc.Define(phi, phi_type)); } const auto phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; const auto val_reg{ctx.reg_alloc.Consume(value)}; if (phi_reg == val_reg) { return; } - ctx.Add("{}={};", phi_reg, val_reg); + if (phi_type == value.Type()) { + ctx.Add("{}={}; // PHI MOVE", phi_reg, val_reg); + } else if (phi_type == IR::Type::U32 && value.Type() == IR::Type::F32) { + ctx.Add("{}=floatBitsToUint({}); // CAST PHI MOVE", phi_reg, val_reg); + } else { + throw NotImplementedException("{} to {} move", phi_type, value.Type()); + const auto cast{ctx.reg_alloc.GetGlslType(phi_type)}; + ctx.Add("{}={}({}); // CAST PHI MOVE", phi_reg, cast, val_reg); + } } void EmitBranch(EmitContext& ctx, std::string_view label) { @@ -235,23 +244,23 @@ void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_ NotImplemented(); } -void EmitUndefU1(EmitContext& ctx) { +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitUndefU8(EmitContext& ctx) { +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitUndefU16(EmitContext& ctx) { +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitUndefU32(EmitContext& ctx) { - NotImplemented(); +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); } -void EmitUndefU64(EmitContext& ctx) { +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index a080d5341..06f1965b5 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -71,26 +71,17 @@ std::string RegAlloc::Define(IR::Inst& inst) { std::string RegAlloc::Define(IR::Inst& inst, Type type) { const Id id{Alloc()}; - const auto type_str{GetType(type, id.index)}; + std::string type_str = ""; + if (!register_defined[id.index]) { + register_defined[id.index] = true; + type_str = GetGlslType(type); + } inst.SetDefinition(id); return type_str + Representation(id); } std::string RegAlloc::Define(IR::Inst& inst, IR::Type type) { - switch (type) { - case IR::Type::U1: - return Define(inst, Type::U1); - case IR::Type::U32: - return Define(inst, Type::U32); - case IR::Type::F32: - return Define(inst, Type::F32); - case IR::Type::U64: - return Define(inst, Type::U64); - case IR::Type::F64: - return Define(inst, Type::F64); - default: - throw NotImplementedException("IR type {}", type); - } + return Define(inst, RegType(type)); } std::string RegAlloc::Consume(const IR::Value& value) { @@ -107,11 +98,24 @@ std::string RegAlloc::Consume(IR::Inst& inst) { return Representation(inst.Definition()); } -std::string RegAlloc::GetType(Type type, u32 index) { - if (register_defined[index]) { - return ""; +Type RegAlloc::RegType(IR::Type type) { + switch (type) { + case IR::Type::U1: + return Type::U1; + case IR::Type::U32: + return Type::U32; + case IR::Type::F32: + return Type::F32; + case IR::Type::U64: + return Type::U64; + case IR::Type::F64: + return Type::F64; + default: + throw NotImplementedException("IR type {}", type); } - register_defined[index] = true; +} + +std::string RegAlloc::GetGlslType(Type type) { switch (type) { case Type::U1: return "bool "; @@ -144,6 +148,10 @@ std::string RegAlloc::GetType(Type type, u32 index) { } } +std::string RegAlloc::GetGlslType(IR::Type type) { + return GetGlslType(RegType(type)); +} + Id RegAlloc::Alloc() { if (num_used_registers < NUM_REGS) { for (size_t reg = 0; reg < NUM_REGS; ++reg) { @@ -170,30 +178,4 @@ void RegAlloc::Free(Id id) { register_use[id.index] = false; } -/*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::Identity: - case IR::Opcode::BitCastU16F16: - case IR::Opcode::BitCastU32F32: - case IR::Opcode::BitCastU64F64: - case IR::Opcode::BitCastF16U16: - case IR::Opcode::BitCastF32U32: - case IR::Opcode::BitCastF64U64: - return true; - default: - return false; - } -} - -/*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { - IR::Inst* it{&inst}; - while (IsAliased(*it)) { - const IR::Value arg{it->Arg(0)}; - if (arg.IsImmediate()) { - break; - } - it = arg.InstRecursive(); - } - return *it; -} } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index df067d3ad..419d0bde0 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -59,20 +59,15 @@ public: std::string Define(IR::Inst& inst, IR::Type type); std::string Consume(const IR::Value& value); - - /// Returns true if the instruction is expected to be aliased to another - static bool IsAliased(const IR::Inst& inst); - - /// Returns the underlying value out of an alias sequence - static IR::Inst& AliasInst(IR::Inst& inst); + std::string GetGlslType(Type type); + std::string GetGlslType(IR::Type type); private: static constexpr size_t NUM_REGS = 4096; static constexpr size_t NUM_ELEMENTS = 4; std::string Consume(IR::Inst& inst); - std::string GetType(Type type, u32 index); - + Type RegType(IR::Type type); Id Alloc(); void Free(Id id); From ed14d31f663e126a8f9fe0ea8abff8e27c46248b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 27 May 2021 00:26:16 -0400 Subject: [PATCH 588/770] glsl: Fix non-immediate buffer access and many other misc implementations --- .../backend/glsl/emit_context.cpp | 1 + .../backend/glsl/emit_context.h | 15 ++++ .../backend/glsl/emit_glsl.cpp | 5 ++ .../backend/glsl/emit_glsl_composite.cpp | 48 +++++-------- .../glsl/emit_glsl_context_get_set.cpp | 68 ++++++++++++++++--- .../backend/glsl/emit_glsl_convert.cpp | 2 +- .../backend/glsl/emit_glsl_floating_point.cpp | 2 +- .../backend/glsl/emit_glsl_instructions.h | 28 ++++---- .../backend/glsl/emit_glsl_memory.cpp | 18 +++-- .../glsl/emit_glsl_not_implemented.cpp | 12 +--- .../backend/glsl/reg_alloc.cpp | 4 ++ .../backend/glsl/reg_alloc.h | 2 + 12 files changed, 133 insertions(+), 72 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index de19e0fba..2bf0def82 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -88,6 +88,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } void EmitContext::SetupExtensions(std::string& header) { + header += "#extension GL_ARB_separate_shader_objects : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 1cd051b24..66f70d355 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -90,6 +90,21 @@ public: Add(format_str, inst, args...); } + template + void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 35dbe19ec..644da43f4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -160,8 +160,13 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("return;"); break; case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("do{{"); + break; case IR::AbstractSyntaxNode::Type::Repeat: + ctx.Add("}}while({});", ctx.reg_alloc.Consume(node.data.repeat.cond)); + break; default: + fmt::print("{}", node.type); throw NotImplementedException("{}", node.type); break; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 048b12f38..aa966a304 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -17,19 +17,14 @@ void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_v ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); } -void EmitCompositeConstructU32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view e1, - [[maybe_unused]] std::string_view e2, - [[maybe_unused]] std::string_view e3) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); } -void EmitCompositeConstructU32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view e1, - [[maybe_unused]] std::string_view e2, - [[maybe_unused]] std::string_view e3, - [[maybe_unused]] std::string_view e4) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); } void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, @@ -37,16 +32,14 @@ void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractU32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractU32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); } void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, @@ -135,19 +128,14 @@ void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_v ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); } -void EmitCompositeConstructF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view e1, - [[maybe_unused]] std::string_view e2, - [[maybe_unused]] std::string_view e3) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3) { + ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); } -void EmitCompositeConstructF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view e1, - [[maybe_unused]] std::string_view e2, - [[maybe_unused]] std::string_view e3, - [[maybe_unused]] std::string_view e4) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4) { + ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); } void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 441818c0b..796f01883 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -10,8 +10,18 @@ #include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { +namespace { static constexpr std::string_view SWIZZLE{"xyzw"}; +u32 CbufIndex(u32 offset) { + return (offset / 4) % 4; +} + +char OffsetSwizzle(u32 offset) { + return SWIZZLE[CbufIndex(offset)]; +} +} // namespace + void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); @@ -34,22 +44,42 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto u32_offset{offset.U32()}; - const auto index{(u32_offset / 4) % 4}; - ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}].{});", inst, binding.U32(), u32_offset / 16, - SWIZZLE[index]); + if (offset.IsImmediate()) { + ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}].{});", inst, binding.U32(), offset.U32() / 16, + OffsetSwizzle(offset.U32())); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}/16][({}/4)%4]);", inst, binding.U32(), offset_var, + offset_var); + } } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto u32_offset{offset.U32()}; - const auto index{(u32_offset / 4) % 4}; - ctx.AddF32("{}=cbuf{}[{}].{};", inst, binding.U32(), u32_offset / 16, SWIZZLE[index]); + if (offset.IsImmediate()) { + ctx.AddF32("{}=cbuf{}[{}].{};", inst, binding.U32(), offset.U32() / 16, + OffsetSwizzle(offset.U32())); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddF32("{}=cbuf{}[{}/16][({}/4)%4];", inst, binding.U32(), offset_var, offset_var); + } } -void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL"); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + if (offset.IsImmediate()) { + const auto u32_offset{offset.U32()}; + const auto index{(u32_offset / 4) % 4}; + ctx.AddU32x2("{}=uvec2(floatBitsToUint(cbuf{}[{}].{}),floatBitsToUint(cbuf{}[{}].{}));", + inst, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + binding.U32(), (offset.U32() + 1) / 16, OffsetSwizzle(offset.U32() + 1)); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32x2("{}=uvec2(floatBitsToUint(cbuf{}[{}/16][({}/" + "4)%4]),floatBitsToUint(cbuf{}[({}+1)/16][(({}+1/4))%4]));", + inst, binding.U32(), offset_var, offset_var, binding.U32(), offset_var, + offset_var); + } } void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, @@ -66,7 +96,23 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - ctx.AddF32("{}=gl_Position.{};", inst, swizzle); + switch (ctx.stage) { + case Stage::VertexA: + case Stage::VertexB: + ctx.AddF32("{}=gl_Position.{};", inst, swizzle); + break; + case Stage::Fragment: + ctx.AddF32("{}=gl_FragCoord.{};", inst, swizzle); + break; + default: + throw NotImplementedException("Get Position for stage {}", ctx.stage); + } + break; + case IR::Attribute::InstanceId: + ctx.AddS32("{}=gl_InstanceID;", inst, ctx.attrib_name); + break; + case IR::Attribute::VertexId: + ctx.AddS32("{}=gl_VertexID;", inst, ctx.attrib_name); break; default: fmt::print("Get attribute {}", attr); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 0f95d4465..866bcfc4d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -197,7 +197,7 @@ void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitConvertF32U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddF32("{}=float({});", inst, value); } void EmitConvertF32U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index f3d1d1af0..33aba9fef 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -133,7 +133,7 @@ void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddF32("{}=1/sqrt({});", inst, value); } void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index c9b53bae2..295b7a8c4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -60,7 +60,8 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, @@ -116,7 +117,8 @@ void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindin const IR::Value& offset); void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value); void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -145,14 +147,16 @@ void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_v void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, std::string_view e2); -void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); +void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); -void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, @@ -175,10 +179,10 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std: u32 index); void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, std::string_view e2); -void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3); -void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, - std::string_view e3, std::string_view e4); +void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, + std::string_view e2, std::string_view e3, std::string_view e4); void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 8994c02a2..32cee7d3e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -34,19 +34,23 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - ctx.AddU32("{}=ssbo{}[{}];", inst, binding.U32(), offset.U32()); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=ssbo{}[{}];", inst, binding.U32(), offset_var); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - ctx.AddU32x2("{}=uvec2(ssbo{}[{}],ssbo{}[{}]);", inst, binding.U32(), offset.U32(), - binding.U32(), offset.U32() + 1); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32x2("{}=uvec2(ssbo{}[{}],ssbo{}[{}+1]);", inst, binding.U32(), offset_var, + binding.U32(), offset_var); } -void EmitLoadStorage128([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); +void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32x4("{}=uvec4(ssbo{}[{}],ssbo{}[{}+1],ssbo{}[{}+2],ssbo{}[{}+3]);", inst, + binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var, + binding.U32(), offset_var); } void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 42b1e8764..761145a5d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -50,15 +50,7 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& if (phi_reg == val_reg) { return; } - if (phi_type == value.Type()) { - ctx.Add("{}={}; // PHI MOVE", phi_reg, val_reg); - } else if (phi_type == IR::Type::U32 && value.Type() == IR::Type::F32) { - ctx.Add("{}=floatBitsToUint({}); // CAST PHI MOVE", phi_reg, val_reg); - } else { - throw NotImplementedException("{} to {} move", phi_type, value.Type()); - const auto cast{ctx.reg_alloc.GetGlslType(phi_type)}; - ctx.Add("{}={}({}); // CAST PHI MOVE", phi_reg, cast, val_reg); - } + ctx.Add("{}={};", phi_reg, val_reg); } void EmitBranch(EmitContext& ctx, std::string_view label) { @@ -245,7 +237,7 @@ void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_ } void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); + ctx.AddU1("{}=false;", inst); } void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 06f1965b5..a987ce543 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -137,6 +137,10 @@ std::string RegAlloc::GetGlslType(Type type) { return "uvec2 "; case Type::F32x2: return "vec2 "; + case Type::U32x3: + return "uvec3 "; + case Type::F32x3: + return "vec3 "; case Type::U32x4: return "uvec4 "; case Type::F32x4: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 419d0bde0..2dc506c58 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -27,6 +27,8 @@ enum class Type : u32 { F64, U32x2, F32x2, + U32x3, + F32x3, U32x4, F32x4, Void, From a752ec88d06c6bcfb13605447a164c6b6915ed6e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 27 May 2021 20:31:03 -0400 Subject: [PATCH 589/770] glsl: Implement derivatives and YDirection plus some other misc additions/changed --- .../backend/glsl/emit_context.cpp | 7 +- .../backend/glsl/emit_context.h | 7 +- .../backend/glsl/emit_glsl_composite.cpp | 72 +++++++------------ .../backend/glsl/emit_glsl_floating_point.cpp | 6 +- .../backend/glsl/emit_glsl_image.cpp | 3 + .../backend/glsl/emit_glsl_instructions.h | 12 ++-- .../glsl/emit_glsl_not_implemented.cpp | 26 +------ .../backend/glsl/emit_glsl_warp.cpp | 35 +++++++++ 8 files changed, 87 insertions(+), 81 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 2bf0def82..0ddc0443b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -22,9 +22,10 @@ std::string_view InterpDecorator(Interpolation interp) { } } // namespace -EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_) - : info{program.info}, profile{profile_} { - std::string header = "#version 450\n"; +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + std::string header = ""; SetupExtensions(header); stage = program.stage; switch (program.stage) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 66f70d355..07dad6e57 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -16,6 +16,7 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Backend { @@ -31,7 +32,8 @@ namespace Shader::Backend::GLSL { class EmitContext { public: - explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -121,6 +123,7 @@ public: RegAlloc reg_alloc; const Info& info; const Profile& profile; + const RuntimeInfo& runtime_info; Stage stage{}; std::string_view stage_name = "invalid"; @@ -131,6 +134,8 @@ public: std::vector texture_bindings; std::vector image_bindings; + bool uses_y_direction{}; + private: void SetupExtensions(std::string& header); void DefineConstantBuffers(); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index aa966a304..22e489aa9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -42,25 +42,19 @@ void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } -void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } -void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, @@ -153,25 +147,19 @@ void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } -void EmitCompositeInsertF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } -void EmitCompositeInsertF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { @@ -198,24 +186,18 @@ void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { throw NotImplementedException("GLSL Instruction"); } -void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } -void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } -void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] std::string_view object, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 33aba9fef..6a9b95eca 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -124,16 +124,16 @@ void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { } void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=1/{};", inst, value); + ctx.AddF32("{}=(1.0f)/{};", inst, value); } void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF64("{}=1/{};", inst, value); + ctx.AddF64("{}=1.0/{};", inst, value); } void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=1/sqrt({});", inst, value); + ctx.AddF32("{}=(1.0f)/sqrt({});", inst, value); } void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index cc5afc048..1a348b117 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -37,6 +37,9 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse if (!offset.IsEmpty()) { throw NotImplementedException("Offset"); } + if (info.type != TextureType::Color2D) { + throw NotImplementedException("Texture type: {}", info.type.Value()); + } const auto texture{Texture(ctx, info, index)}; ctx.AddF32x4("{}=texture({},{});", inst, texture, coords); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 295b7a8c4..f08ed0ece 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -87,7 +87,7 @@ void EmitLocalInvocationId(EmitContext& ctx); void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); -void EmitYDirection(EmitContext& ctx); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); @@ -692,11 +692,11 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask); -void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, std::string_view swizzle); -void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); -void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); -void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); -void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 761145a5d..4dfc0c896 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -224,8 +224,9 @@ void EmitIsHelperInvocation(EmitContext& ctx) { NotImplemented(); } -void EmitYDirection(EmitContext& ctx) { - NotImplemented(); +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); } void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { @@ -650,25 +651,4 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val NotImplemented(); } -void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, - std::string_view swizzle) { - NotImplemented(); -} - -void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { - NotImplemented(); -} - -void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { - NotImplemented(); -} - -void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { - NotImplemented(); -} - -void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { - NotImplemented(); -} - } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index e69de29bb..187677878 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, + [[maybe_unused]] std::string_view swizzle) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); +} + +void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); +} + +void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); +} + +void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); +} +} // namespace Shader::Backend::GLSL From 6674637853009115833e132efce19c8e210f0471 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 27 May 2021 20:37:56 -0400 Subject: [PATCH 590/770] glsl: remove unused headers --- .../backend/glsl/emit_glsl.cpp | 17 ++++++++++------- .../backend/glsl/emit_glsl_atomic.cpp | 2 -- .../glsl/emit_glsl_bitwise_conversion.cpp | 2 -- .../backend/glsl/emit_glsl_composite.cpp | 2 -- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 -- .../backend/glsl/emit_glsl_convert.cpp | 2 -- .../backend/glsl/emit_glsl_floating_point.cpp | 2 -- .../backend/glsl/emit_glsl_image.cpp | 2 -- .../backend/glsl/emit_glsl_integer.cpp | 2 -- .../backend/glsl/emit_glsl_logical.cpp | 2 -- .../backend/glsl/emit_glsl_memory.cpp | 2 -- .../backend/glsl/emit_glsl_not_implemented.cpp | 2 -- .../backend/glsl/emit_glsl_select.cpp | 2 -- .../backend/glsl/emit_glsl_warp.cpp | 3 --- 14 files changed, 10 insertions(+), 34 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 644da43f4..56738bcc5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -4,17 +4,12 @@ #include #include -#include -#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/profile.h" -#pragma optimize("", off) namespace Shader::Backend::GLSL { namespace { template @@ -173,13 +168,21 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } +std::string GlslVersionSpecifier(const EmitContext& ctx) { + if (ctx.uses_y_direction) { + return " compatibility"; + } + return ""; +} } // Anonymous namespace -std::string EmitGLSL(const Profile& profile, const RuntimeInfo&, IR::Program& program, +std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings) { - EmitContext ctx{program, bindings, profile}; + EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(program); EmitCode(ctx, program); + const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; + ctx.code.insert(0, version); ctx.code += "}"; fmt::print("\n{}\n", ctx.code); return ctx.code; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 0c3af75f5..654196e55 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -6,9 +6,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 8512147e2..0fa99c526 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 22e489aa9..2cb935742 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { static constexpr std::string_view SWIZZLE{"xyzw"}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 796f01883..38ad9de35 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 866bcfc4d..c27965ef0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 6a9b95eca..a12bf98d7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 1a348b117..6b7f1eaad 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -5,10 +5,8 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 681bc1bfa..ce6e12623 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp index e4781c03c..aae32f7aa 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 32cee7d3e..6168c4e06 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 4dfc0c896..110d3322e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -5,8 +5,6 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" -#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" #ifdef _MSC_VER diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index a049e3dc9..1f2790b7d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -5,9 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { void EmitSelectU1([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 187677878..aebdf8a3a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -5,10 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" -#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" -#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 2a713337165df4d5c4228458999a680e9ab65369 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 27 May 2021 22:28:33 -0400 Subject: [PATCH 591/770] glsl: Fix bindings, add some CC ops --- .../backend/glsl/emit_context.cpp | 57 +++++++++++-------- .../backend/glsl/emit_context.h | 5 +- .../glsl/emit_glsl_context_get_set.cpp | 33 ++++++----- .../backend/glsl/emit_glsl_image.cpp | 13 ++--- .../backend/glsl/emit_glsl_instructions.h | 4 +- .../backend/glsl/emit_glsl_integer.cpp | 29 +++++++++- .../glsl/emit_glsl_not_implemented.cpp | 2 +- .../backend/glsl/emit_glsl_select.cpp | 7 +-- 8 files changed, 92 insertions(+), 58 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0ddc0443b..7bd6b3605 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -20,6 +20,20 @@ std::string_view InterpDecorator(Interpolation interp) { } throw InvalidArgument("Invalid interpolation {}", interp); } + +std::string_view SamplerType(TextureType type) { + switch (type) { + case TextureType::Color2D: + return "sampler2D"; + case TextureType::ColorArray2D: + return "sampler2DArray"; + case TextureType::Color3D: + return "sampler3D"; + default: + throw NotImplementedException("Texture type: {}", type); + } +} + } // namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -31,27 +45,23 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile switch (program.stage) { case Stage::VertexA: case Stage::VertexB: - stage_name = "vertex"; - attrib_name = "vertex"; + stage_name = "vs"; // TODO: add only what's used by the shader header += "out gl_PerVertex {vec4 gl_Position;float gl_PointSize;float gl_ClipDistance[];};"; break; case Stage::TessellationControl: case Stage::TessellationEval: - stage_name = "primitive"; - attrib_name = "primitive"; + stage_name = "ts"; break; case Stage::Geometry: - stage_name = "primitive"; - attrib_name = "vertex"; + stage_name = "gs"; break; case Stage::Fragment: - stage_name = "fragment"; - attrib_name = "fragment"; + stage_name = "fs"; break; case Stage::Compute: - stage_name = "invocation"; + stage_name = "cs"; header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", program.workgroup_size[0], program.workgroup_size[1], program.workgroup_size[2]); @@ -77,12 +87,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("layout(location={}) out vec4 out_attr{};", index, index); } } - DefineConstantBuffers(); - DefineStorageBuffers(); - DefineHelperFunctions(); + DefineConstantBuffers(bindings); + DefineStorageBuffers(bindings); SetupImages(bindings); - Add("void main(){{"); + DefineHelperFunctions(); + Add("void main(){{"); if (stage == Stage::VertexA || stage == Stage::VertexB) { Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); } @@ -112,27 +122,25 @@ void EmitContext::SetupExtensions(std::string& header) { } } -void EmitContext::DefineConstantBuffers() { +void EmitContext::DefineConstantBuffers(Bindings& bindings) { if (info.constant_buffer_descriptors.empty()) { return; } - u32 binding{}; for (const auto& desc : info.constant_buffer_descriptors) { - Add("layout(std140,binding={}) uniform cbuf_{}{{vec4 cbuf{}[{}];}};", binding, desc.index, - desc.index, 4 * 1024); - ++binding; + Add("layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", + bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); + bindings.uniform_buffer += desc.count; } } -void EmitContext::DefineStorageBuffers() { +void EmitContext::DefineStorageBuffers(Bindings& bindings) { if (info.storage_buffers_descriptors.empty()) { return; } - u32 binding{}; for (const auto& desc : info.storage_buffers_descriptors) { - Add("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", binding, binding, - desc.cbuf_index, desc.count); - ++binding; + Add("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", bindings.storage_buffer, + bindings.storage_buffer, desc.cbuf_index); + bindings.storage_buffer += desc.count; } } @@ -203,10 +211,11 @@ void EmitContext::SetupImages(Bindings& bindings) { } texture_bindings.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { + const auto sampler_type{SamplerType(desc.type)}; texture_bindings.push_back(bindings.texture); const auto indices{bindings.texture + desc.count}; for (u32 index = bindings.texture; index < indices; ++index) { - Add("layout(binding={}) uniform sampler2D tex{};", bindings.texture, index); + Add("layout(binding={}) uniform {} tex{};", bindings.texture, sampler_type, index); } bindings.texture += desc.count; } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 07dad6e57..9dff921db 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -127,7 +127,6 @@ public: Stage stage{}; std::string_view stage_name = "invalid"; - std::string_view attrib_name = "invalid"; std::vector texture_buffer_bindings; std::vector image_buffer_bindings; @@ -138,8 +137,8 @@ public: private: void SetupExtensions(std::string& header); - void DefineConstantBuffers(); - void DefineStorageBuffers(); + void DefineConstantBuffers(Bindings& bindings); + void DefineStorageBuffers(Bindings& bindings); void DefineHelperFunctions(); void SetupImages(Bindings& bindings); }; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 38ad9de35..67d308c49 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -43,23 +43,24 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}].{});", inst, binding.U32(), offset.U32() / 16, - OffsetSwizzle(offset.U32())); + ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(), + offset.U32() / 16, OffsetSwizzle(offset.U32())); } else { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32("{}=floatBitsToUint(cbuf{}[{}/16][({}/4)%4]);", inst, binding.U32(), offset_var, - offset_var); + ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); } } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddF32("{}=cbuf{}[{}].{};", inst, binding.U32(), offset.U32() / 16, + ctx.AddF32("{}={}_cbuf{}[{}].{};", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32())); } else { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddF32("{}=cbuf{}[{}/16][({}/4)%4];", inst, binding.U32(), offset_var, offset_var); + ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(), + offset_var, offset_var); } } @@ -68,15 +69,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding if (offset.IsImmediate()) { const auto u32_offset{offset.U32()}; const auto index{(u32_offset / 4) % 4}; - ctx.AddU32x2("{}=uvec2(floatBitsToUint(cbuf{}[{}].{}),floatBitsToUint(cbuf{}[{}].{}));", - inst, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - binding.U32(), (offset.U32() + 1) / 16, OffsetSwizzle(offset.U32() + 1)); + ctx.AddU32x2( + "{}=uvec2(floatBitsToUint({}_cbuf{}[{}].{}),floatBitsToUint({}_cbuf{}[{}].{}));", inst, + ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ctx.stage_name, binding.U32(), (offset.U32() + 1) / 16, + OffsetSwizzle(offset.U32() + 1)); } else { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(floatBitsToUint(cbuf{}[{}/16][({}/" - "4)%4]),floatBitsToUint(cbuf{}[({}+1)/16][(({}+1/4))%4]));", - inst, binding.U32(), offset_var, offset_var, binding.U32(), offset_var, - offset_var); + ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" + "4)%4]),floatBitsToUint({}_cbuf{}[({}+1)/16][(({}+1/4))%4]));", + inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, + binding.U32(), offset_var, offset_var); } } @@ -107,10 +110,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } break; case IR::Attribute::InstanceId: - ctx.AddS32("{}=gl_InstanceID;", inst, ctx.attrib_name); + ctx.AddS32("{}=gl_InstanceID;", inst); break; case IR::Attribute::VertexId: - ctx.AddS32("{}=gl_VertexID;", inst, ctx.attrib_name); + ctx.AddS32("{}=gl_VertexID;", inst); break; default: fmt::print("Get attribute {}", attr); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 6b7f1eaad..c070fba0e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -32,14 +32,13 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse if (info.has_lod_clamp) { throw NotImplementedException("Lod clamp samples"); } - if (!offset.IsEmpty()) { - throw NotImplementedException("Offset"); - } - if (info.type != TextureType::Color2D) { - throw NotImplementedException("Texture type: {}", info.type.Value()); - } const auto texture{Texture(ctx, info, index)}; - ctx.AddF32x4("{}=texture({},{});", inst, texture, coords); + if (!offset.IsEmpty()) { + ctx.AddF32x4("{}=textureOffset({},{},ivec2({}));", inst, texture, coords, + ctx.reg_alloc.Consume(offset)); + } else { + ctx.AddF32x4("{}=texture({},{});", inst, texture, coords); + } } void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index f08ed0ece..b54fe684e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -207,8 +207,8 @@ void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std: u32 index); void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); -void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, - std::string_view false_value); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value); void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, std::string_view false_value); void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index ce6e12623..84e01b151 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -8,8 +8,30 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; + if (!zero) { + return; + } + ctx.AddU1("{}={}==0;", *zero, result); + zero->Invalidate(); +} + +void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { + IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; + if (!sign) { + return; + } + ctx.AddU1("{}=int({})<0;", *sign, result); + sign->Invalidate(); +} +} // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}={}+{};", inst, a, b); + const auto result{ctx.reg_alloc.Define(inst)}; + ctx.Add("uint {}={}+{};", result, a, b); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); } void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { @@ -98,7 +120,10 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view bas void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count) { - ctx.AddU32("{}=bitfieldExtract({}, int({}), int({}));", inst, base, offset, count); + const auto result{ctx.reg_alloc.Define(inst)}; + ctx.Add("uint {}=bitfieldExtract({},int({}),int({}));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); } void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 110d3322e..3bac8899b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -29,7 +29,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& phi) { } void EmitVoid(EmitContext& ctx) { - NotImplemented(); + // NotImplemented(); } void EmitReference(EmitContext&) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 1f2790b7d..ad3713f2d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -8,10 +8,9 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { -void EmitSelectU1([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, - [[maybe_unused]] std::string_view true_value, - [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); +void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, + std::string_view true_value, std::string_view false_value) { + ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, From f6bbc76336942454a862280e5b2158ceab49a173 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 28 May 2021 13:54:09 -0400 Subject: [PATCH 592/770] glsl: WIP var forward declaration to fix Loop control flow. --- .../backend/glsl/emit_context.cpp | 64 ++++++++++--------- .../backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl.cpp | 23 +++---- .../backend/glsl/emit_glsl_integer.cpp | 8 +-- .../backend/glsl/reg_alloc.cpp | 4 +- .../backend/glsl/reg_alloc.h | 9 ++- 6 files changed, 60 insertions(+), 49 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7bd6b3605..3530e89e5 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -29,7 +29,10 @@ std::string_view SamplerType(TextureType type) { return "sampler2DArray"; case TextureType::Color3D: return "sampler3D"; + case TextureType::ColorCube: + return "samplerCube"; default: + fmt::print("Texture type: {}", type); throw NotImplementedException("Texture type: {}", type); } } @@ -39,7 +42,6 @@ std::string_view SamplerType(TextureType type) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { - std::string header = ""; SetupExtensions(header); stage = program.stage; switch (program.stage) { @@ -67,24 +69,23 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile program.workgroup_size[2]); break; } - code += header; const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { - Add("layout(location={}) {} in vec4 in_attr{};", index, - InterpDecorator(generic.interpolation), index); + header += fmt::format("layout(location={}) {} in vec4 in_attr{};", index, + InterpDecorator(generic.interpolation), index); } } for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { if (!info.stores_frag_color[index]) { continue; } - Add("layout(location={})out vec4 frag_color{};", index, index); + header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { - Add("layout(location={}) out vec4 out_attr{};", index, index); + header += fmt::format("layout(location={}) out vec4 out_attr{};", index, index); } } DefineConstantBuffers(bindings); @@ -92,14 +93,15 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupImages(bindings); DefineHelperFunctions(); - Add("void main(){{"); + header += "void main(){\n"; if (stage == Stage::VertexA || stage == Stage::VertexB) { Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); } } -void EmitContext::SetupExtensions(std::string& header) { +void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; + // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } @@ -127,7 +129,8 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { return; } for (const auto& desc : info.constant_buffer_descriptors) { - Add("layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", + header += fmt::format( + "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); bindings.uniform_buffer += desc.count; } @@ -138,53 +141,53 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { return; } for (const auto& desc : info.storage_buffers_descriptors) { - Add("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", bindings.storage_buffer, - bindings.storage_buffer, desc.cbuf_index); + header += fmt::format("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", + bindings.storage_buffer, bindings.storage_buffer, desc.cbuf_index); bindings.storage_buffer += desc.count; } } void EmitContext::DefineHelperFunctions() { if (info.uses_global_increment) { - code += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; + header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; } if (info.uses_global_decrement) { - code += + header += "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } if (info.uses_atomic_f32_add) { - code += "uint CasFloatAdd(uint op_a,float op_b){return " - "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; + header += "uint CasFloatAdd(uint op_a,float op_b){return " + "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; } if (info.uses_atomic_f32x2_add) { - code += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; + header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; } if (info.uses_atomic_f32x2_min) { - code += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; + header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; } if (info.uses_atomic_f32x2_max) { - code += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; + header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " + "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; } if (info.uses_atomic_f16x2_add) { - code += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " - "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; + header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; } if (info.uses_atomic_f16x2_min) { - code += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " - "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; + header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; } if (info.uses_atomic_f16x2_max) { - code += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " - "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; + header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; } if (info.uses_atomic_s32_min) { - code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; + header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; } if (info.uses_atomic_s32_max) { - code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; + header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } } @@ -215,7 +218,8 @@ void EmitContext::SetupImages(Bindings& bindings) { texture_bindings.push_back(bindings.texture); const auto indices{bindings.texture + desc.count}; for (u32 index = bindings.texture; index < indices; ++index) { - Add("layout(binding={}) uniform {} tex{};", bindings.texture, sampler_type, index); + header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, + sampler_type, index); } bindings.texture += desc.count; } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 9dff921db..c9d629c40 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -119,6 +119,7 @@ public: code += '\n'; } + std::string header; std::string code; RegAlloc reg_alloc; const Info& info; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 56738bcc5..feb3ede1a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -83,6 +83,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { + // ctx.Add("/* {} */", inst->GetOpcode()); switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ @@ -108,12 +109,9 @@ void PrecolorInst(IR::Inst& phi) { if (arg.IsImmediate()) { ir.PhiMove(phi, arg); } else { - ir.PhiMove(phi, IR::Value{&*arg.InstRecursive()}); + ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); } } - for (size_t i = 0; i < num_args; ++i) { - IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); - } } void Precolor(const IR::Program& program) { @@ -144,10 +142,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("break;"); } } else { - // TODO: implement this - ctx.Add("MOV.S.CC RC,{};" - "BRK (NE.x);", - 0); + ctx.Add("if({}){{break;}}", ctx.reg_alloc.Consume(node.data.break_node.cond)); } break; case IR::AbstractSyntaxNode::Type::Return: @@ -155,10 +150,12 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("return;"); break; case IR::AbstractSyntaxNode::Type::Loop: - ctx.Add("do{{"); + ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("}}while({});", ctx.reg_alloc.Consume(node.data.repeat.cond)); + ctx.Add("if({}){{", ctx.reg_alloc.Consume(node.data.repeat.cond)); + ctx.Add("continue;\n}}else{{"); + ctx.Add("break;\n}}\n}}"); break; default: fmt::print("{}", node.type); @@ -182,7 +179,11 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR Precolor(program); EmitCode(ctx, program); const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; - ctx.code.insert(0, version); + ctx.header.insert(0, version); + for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { + ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); + } + ctx.code.insert(0, ctx.header); ctx.code += "}"; fmt::print("\n{}\n", ctx.code); return ctx.code; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 84e01b151..6654fce81 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -28,8 +28,8 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { } } // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - const auto result{ctx.reg_alloc.Define(inst)}; - ctx.Add("uint {}={}+{};", result, a, b); + const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + ctx.Add("{}={}+{};", result, a, b); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); } @@ -120,8 +120,8 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view bas void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count) { - const auto result{ctx.reg_alloc.Define(inst)}; - ctx.Add("uint {}=bitfieldExtract({},int({}),int({}));", result, base, offset, count); + const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + ctx.Add("{}=bitfieldExtract({},int({}),int({}));", result, base, offset, count); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index a987ce543..b287b870a 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -74,7 +74,9 @@ std::string RegAlloc::Define(IR::Inst& inst, Type type) { std::string type_str = ""; if (!register_defined[id.index]) { register_defined[id.index] = true; - type_str = GetGlslType(type); + // type_str = GetGlslType(type); + reg_types.push_back(GetGlslType(type)); + ++num_used_registers; } inst.SetDefinition(id); return type_str + Representation(id); diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 2dc506c58..6c293f9d1 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/bit_field.h" #include "common/common_types.h" @@ -61,19 +62,21 @@ public: std::string Define(IR::Inst& inst, IR::Type type); std::string Consume(const IR::Value& value); + std::string Consume(IR::Inst& inst); + std::string GetGlslType(Type type); std::string GetGlslType(IR::Type type); + size_t num_used_registers{}; + std::vector reg_types; + private: static constexpr size_t NUM_REGS = 4096; - static constexpr size_t NUM_ELEMENTS = 4; - std::string Consume(IR::Inst& inst); Type RegType(IR::Type type); Id Alloc(); void Free(Id id); - size_t num_used_registers{}; std::bitset register_use{}; std::bitset register_defined{}; }; From 453cd25da57e4088826cb6df48b5b6856affe109 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 28 May 2021 13:55:07 -0400 Subject: [PATCH 593/770] glsl: SSBO access fixes and wip SampleExplicitLod implementation. --- .../backend/glsl/emit_glsl_image.cpp | 15 ++++++++++++++- .../backend/glsl/emit_glsl_memory.cpp | 8 +++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index c070fba0e..1a34fe9b3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -46,7 +46,20 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse [[maybe_unused]] std::string_view coords, [[maybe_unused]] std::string_view lod_lc, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + if (!offset.IsEmpty()) { + ctx.AddF32x4("{}=textureLodOffset({},{},{},ivec2({}));", inst, texture, coords, lod_lc, + ctx.reg_alloc.Consume(offset)); + } else { + ctx.AddF32x4("{}=textureLod({},{},{});", inst, texture, coords, lod_lc); + } } void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 6168c4e06..708c9685b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -81,13 +81,15 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("ssbo{}[{}]={};", binding.U32(), offset.U32(), value); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}]={};", binding.U32(), offset_var, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.Add("ssbo{}[{}]={}.x;", binding.U32(), offset.U32(), value); - ctx.Add("ssbo{}[{}]={}.y;", binding.U32(), offset.U32() + 1, value); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}]={}.x;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[{}+1]={}.y;", binding.U32(), offset_var, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, From 21797efa548598692a82a25959865236bd9e7116 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 28 May 2021 15:51:50 -0400 Subject: [PATCH 594/770] glsl: Implement IADD CC --- .../backend/glsl/emit_glsl.cpp | 2 ++ .../backend/glsl/emit_glsl_integer.cpp | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index feb3ede1a..992e4b82e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -183,6 +183,8 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); } + // TODO: track CC usage + ctx.header += "uint carry;"; ctx.code.insert(0, ctx.header); ctx.code += "}"; fmt::print("\n{}\n", ctx.code); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 6654fce81..6ff0f9248 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -29,9 +29,22 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { } // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; - ctx.Add("{}={}+{};", result, a, b); + if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); + ctx.AddU1("{}=carry!=0;", *carry, result); + carry->Invalidate(); + } else { + ctx.Add("{}={}+{};", result, a, b); + } SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); + if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + ctx.AddU1("{}=int({})>=0?int({})>int({}-{}):int({})Invalidate(); + } } void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { @@ -179,7 +192,7 @@ void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std:: } void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { - ctx.AddU1("{}=uint({}) Date: Fri, 28 May 2021 16:32:01 -0400 Subject: [PATCH 595/770] glsl: Fix integer conversions, implement clamp CC --- .../backend/glsl/emit_glsl_convert.cpp | 42 +++++++++---------- .../backend/glsl/emit_glsl_integer.cpp | 21 +++++++--- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index c27965ef0..fa1b02af1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -30,12 +30,12 @@ void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertS32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddS32("{}=int({});", inst, value); + ctx.AddS32("{}=int(float({}));", inst, value); } void EmitConvertS32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddS32("{}=int({});", inst, value); + ctx.AddS32("{}=int(double({}));", inst, value); } void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -45,12 +45,12 @@ void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertS64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddS64("{}=int64_t(double({}));", inst, value); + ctx.AddS64("{}=int64_t(double(float({})));", inst, value); } void EmitConvertS64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddS64("{}=int64_t({});", inst, value); + ctx.AddS64("{}=int64_t(double({}));", inst, value); } void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -75,12 +75,12 @@ void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertU32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU32("{}=uint({});", inst, value); + ctx.AddU32("{}=uint(float({}));", inst, value); } void EmitConvertU32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU32("{}=uint({});", inst, value); + ctx.AddU32("{}=uint(double({}));", inst, value); } void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -90,22 +90,22 @@ void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertU64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU64("{}=uint64_t(double({}));", inst, value); + ctx.AddU64("{}=uint64_t(float({}));", inst, value); } void EmitConvertU64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU64("{}=uint64_t({});", inst, value); + ctx.AddU64("{}=uint64_t(double({}));", inst, value); } void EmitConvertU64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU64("{}=uint64_t({});", inst, value); + ctx.AddU64("{}=uint64_t(uint({}));", inst, value); } void EmitConvertU32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddU32("{}=uint({});", inst, value); + ctx.AddU32("{}=uint(uint64_t({}));", inst, value); } void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -120,12 +120,12 @@ void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float({});", inst, value); + ctx.AddF32("{}=float(double({}));", inst, value); } void EmitConvertF64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=double({});", inst, value); + ctx.AddF64("{}=double(float({}));", inst, value); } void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -180,12 +180,12 @@ void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF32S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float({});", inst, value); + ctx.AddF32("{}=float(int({}));", inst, value); } void EmitConvertF32S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float({});", inst, value); + ctx.AddF32("{}=float(double(int64_t({})));", inst, value); } void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -195,17 +195,17 @@ void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitConvertF32U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float({});", inst, value); + ctx.AddF32("{}=float(uint({}));", inst, value); } void EmitConvertF32U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float({});", inst, value); + ctx.AddF32("{}=float(uint({}));", inst, value); } void EmitConvertF32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float({});", inst, value); + ctx.AddF32("{}=float(double(uint64_t({})));", inst, value); } void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -220,12 +220,12 @@ void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF64S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=double({});", inst, value); + ctx.AddF64("{}=double(int({}));", inst, value); } void EmitConvertF64S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=double({});", inst, value); + ctx.AddF64("{}=double(int64_t({}));", inst, value); } void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -240,12 +240,12 @@ void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=double({});", inst, value); + ctx.AddF64("{}=double(uint({}));", inst, value); } void EmitConvertF64U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF64("{}=double({});", inst, value); + ctx.AddF64("{}=double(uint64_t({}));", inst, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 6ff0f9248..34f880f1b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -72,11 +72,11 @@ void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { } void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=abs({});", inst, value); + ctx.AddU32("{}=abs(int({}));", inst, value); } void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=abs({});", inst, value); + ctx.AddU64("{}=abs(int64_t({}));", inst, value); } void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, @@ -128,13 +128,16 @@ void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count) { - ctx.AddU32("{}=bitfieldExtract(int({}), int({}), int({}));", inst, base, offset, count); + const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); } void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count) { const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; - ctx.Add("{}=bitfieldExtract({},int({}),int({}));", result, base, offset, count); + ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); } @@ -179,12 +182,18 @@ void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max) { - ctx.AddU32("{}=clamp(int({}), int({}), int({}));", inst, value, min, max); + const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + ctx.Add("{}=clamp(int({}), int({}), int({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); } void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max) { - ctx.AddU32("{}=clamp(uint({}), uint({}), uint({}));", inst, value, min, max); + const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + ctx.Add("{}=clamp(uint({}), uint({}), uint({}));", result, value, min, max); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); } void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { From 8ba814efb295f0b8494b3679c484c7ceab31c392 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 28 May 2021 21:24:52 -0400 Subject: [PATCH 596/770] glsl: Better Storage access and wip warps --- .../backend/glsl/emit_context.cpp | 4 ++ .../backend/glsl/emit_glsl.cpp | 5 +- .../glsl/emit_glsl_context_get_set.cpp | 16 +++-- .../backend/glsl/emit_glsl_instructions.h | 26 +++++--- .../backend/glsl/emit_glsl_integer.cpp | 4 +- .../backend/glsl/emit_glsl_memory.cpp | 60 +++++++++++++------ .../glsl/emit_glsl_not_implemented.cpp | 27 --------- .../backend/glsl/emit_glsl_warp.cpp | 53 ++++++++++++++++ 8 files changed, 133 insertions(+), 62 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 3530e89e5..db62ba73b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,6 +122,10 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; } } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_subgroup_shuffles || info.uses_fswzadd) { + header += "#extension GL_ARB_shader_ballot : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 992e4b82e..800de58b7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -183,8 +183,11 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); } - // TODO: track CC usage + // TODO: track usage ctx.header += "uint carry;"; + if (program.info.uses_subgroup_shuffles) { + ctx.header += "bool shfl_in_bounds;\n"; + } ctx.code.insert(0, ctx.header); ctx.code += "}"; fmt::print("\n{}\n", ctx.code); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 67d308c49..2286177a7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -20,22 +20,26 @@ char OffsetSwizzle(u32 offset) { } } // namespace -void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } -void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } -void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } -void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } @@ -151,4 +155,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_vi ctx.Add("frag_color{}.{}={};", index, swizzle, value); } +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index b54fe684e..07408d9e9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -52,10 +52,14 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -83,7 +87,7 @@ void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); void EmitWorkgroupId(EmitContext& ctx); -void EmitLocalInvocationId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); @@ -109,10 +113,14 @@ void EmitWriteGlobalS16(EmitContext& ctx); void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 34f880f1b..07e1a4b51 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -156,12 +156,12 @@ void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=findMSB(int({}));", inst, value); } void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=findMSB(uint({}));", inst, value); } void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 708c9685b..09ad35e44 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -8,45 +8,55 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { -void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int({}%4)*8,8);", inst, binding.U32(), offset_var, + offset_var); } -void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int({}%4)*8,8);", inst, binding.U32(), + offset_var, offset_var); } -void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, +void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, binding.U32(), + offset_var, offset_var); } -void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, +void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, binding.U32(), + offset_var, offset_var); } void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32("{}=ssbo{}[{}];", inst, binding.U32(), offset_var); + ctx.AddU32("{}=ssbo{}[{}/4];", inst, binding.U32(), offset_var); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(ssbo{}[{}],ssbo{}[{}+1]);", inst, binding.U32(), offset_var, + ctx.AddU32x2("{}=uvec2(ssbo{}[{}/4],ssbo{}[{}/4+1]);", inst, binding.U32(), offset_var, binding.U32(), offset_var); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x4("{}=uvec4(ssbo{}[{}],ssbo{}[{}+1],ssbo{}[{}+2],ssbo{}[{}+3]);", inst, + ctx.AddU32x4("{}=uvec4(ssbo{}[{}/4],ssbo{}[{}/4+1],ssbo{}[{}/4+2],ssbo{}[{}/4+3]);", inst, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var); } @@ -55,47 +65,59 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}]={};", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[{}/4]={};", binding.U32(), offset_var, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}]={}.x;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[{}+1]={}.y;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+1]={}.y;", binding.U32(), offset_var, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+1]={}.y;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+2]={}.z;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+3]={}.w;", binding.U32(), offset_var, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 3bac8899b..ec80f3cef 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -206,10 +206,6 @@ void EmitWorkgroupId(EmitContext& ctx) { NotImplemented(); } -void EmitLocalInvocationId(EmitContext& ctx) { - NotImplemented(); -} - void EmitInvocationId(EmitContext& ctx) { NotImplemented(); } @@ -626,27 +622,4 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { - NotImplemented(); -} - } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index aebdf8a3a..0a488188b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -8,6 +8,59 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + + ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); + in_bounds->Invalidate(); +} +} // namespace + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},gl_SubGroupInvocationARB-{}):" + "{};", + inst, value, index, value); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); +} + void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, [[maybe_unused]] std::string_view swizzle) { From b98de76ea8c8d159f1c44edac20c9a3390da1f40 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 28 May 2021 21:58:26 -0400 Subject: [PATCH 597/770] glsl: Rework Shuffle emit instructions to align with SPIR-V --- .../backend/glsl/emit_glsl_warp.cpp | 59 +++++++++++++------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 0a488188b..0b85aaba2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -14,51 +14,72 @@ void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { if (!in_bounds) { return; } - ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); in_bounds->Invalidate(); } + +std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { + return fmt::format("({}&{})", thread_id, segmentation_mask); +} + +std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, + std::string_view not_seg_mask) { + return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); +} + +std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, + std::string_view segmentation_mask) { + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); +} } // namespace void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { - ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" - "&~{}));", - index, segmentation_mask, clamp, segmentation_mask); + const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; + const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; + + const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; + const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); + ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); } void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { - ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" - "&~{}));", - index, segmentation_mask, clamp, segmentation_mask); + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},gl_SubGroupInvocationARB-{}):" - "{};", - inst, value, index, value); + ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); } void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { - ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" - "&~{}));", - index, segmentation_mask, clamp, segmentation_mask); + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); + ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); } void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { - ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" - "&~{}));", - index, segmentation_mask, clamp, segmentation_mask); + const auto thread_id{"gl_SubGroupInvocationARB"}; + const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; + const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; + ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); + ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); } void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 55e0211a5e520482246273f2cc64388c4b4eff1c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 01:06:29 -0400 Subject: [PATCH 598/770] glsl: Implement TEX ImageSample functions --- .../backend/glsl/emit_context.cpp | 9 +++ .../backend/glsl/emit_glsl_convert.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 71 ++++++++++++++++--- 3 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index db62ba73b..eb1d8266f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -23,6 +23,10 @@ std::string_view InterpDecorator(Interpolation interp) { std::string_view SamplerType(TextureType type) { switch (type) { + case TextureType::Color1D: + return "sampler1D"; + case TextureType::ColorArray1D: + return "sampler1DArray"; case TextureType::Color2D: return "sampler2D"; case TextureType::ColorArray2D: @@ -31,6 +35,10 @@ std::string_view SamplerType(TextureType type) { return "sampler3D"; case TextureType::ColorCube: return "samplerCube"; + case TextureType::ColorArrayCube: + return "samplerCubeArray"; + case TextureType::Buffer: + return "samplerBuffer"; default: fmt::print("Texture type: {}", type); throw NotImplementedException("Texture type: {}", type); @@ -101,6 +109,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; + header += "#extension GL_ARB_sparse_texture2 : enable\n"; // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index fa1b02af1..087eaea8f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -195,7 +195,7 @@ void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In void EmitConvertF32U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=float(uint({}));", inst, value); + ctx.AddF32("{}=float(uint({}&0xffff));", inst, value); } void EmitConvertF32U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 1a34fe9b3..71eb3ac2b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -10,7 +10,7 @@ namespace Shader::Backend::GLSL { namespace { -std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, [[maybe_unused]] const IR::Value& index) { if (info.type == TextureType::Buffer) { throw NotImplementedException("TextureType::Buffer"); @@ -18,6 +18,32 @@ std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, return fmt::format("tex{}", ctx.texture_bindings.at(info.descriptor_index)); } } + +std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + return fmt::format("ivec2({})", value); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("Offset type {}", info.type.Value()); + } +} + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} } // namespace void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -26,18 +52,30 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse [[maybe_unused]] std::string_view bias_lc, [[maybe_unused]] const IR::Value& offset) { const auto info{inst.Flags()}; - if (info.has_bias) { - throw NotImplementedException("Bias texture samples"); - } if (info.has_lod_clamp) { throw NotImplementedException("Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + if (!sparse_inst) { + if (!offset.IsEmpty()) { + ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, + CastToIntVec(ctx.reg_alloc.Consume(offset), info), bias); + } else { + ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); + } + return; + } + // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { - ctx.AddF32x4("{}=textureOffset({},{},ivec2({}));", inst, texture, coords, - ctx.reg_alloc.Consume(offset)); + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", + *sparse_inst, texture, coords, CastToIntVec(ctx.reg_alloc.Consume(offset), info), + texel, bias); } else { - ctx.AddF32x4("{}=texture({},{});", inst, texture, coords); + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, + texture, coords, texel, bias); } } @@ -54,11 +92,24 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse throw NotImplementedException("Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + if (!sparse_inst) { + if (!offset.IsEmpty()) { + ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, + CastToIntVec(ctx.reg_alloc.Consume(offset), info)); + } else { + ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); + } + return; + } if (!offset.IsEmpty()) { - ctx.AddF32x4("{}=textureLodOffset({},{},{},ivec2({}));", inst, texture, coords, lod_lc, - ctx.reg_alloc.Consume(offset)); + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, + CastToIntVec(ctx.reg_alloc.Consume(offset), info), texel); } else { - ctx.AddF32x4("{}=textureLod({},{},{});", inst, texture, coords, lod_lc); + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, + texture, coords, lod_lc, texel); } } From 7619b7d427437cb58df0f9fc57a7d6b3f5c45f9c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 01:53:32 -0400 Subject: [PATCH 599/770] glsl: Implement TEX depth functions --- .../backend/glsl/emit_context.cpp | 24 +++++++++++++++-- .../backend/glsl/emit_glsl_image.cpp | 26 +++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index eb1d8266f..94ba9af7c 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -21,7 +21,26 @@ std::string_view InterpDecorator(Interpolation interp) { throw InvalidArgument("Invalid interpolation {}", interp); } -std::string_view SamplerType(TextureType type) { +std::string_view SamplerType(TextureType type, bool is_depth) { + if (is_depth) { + switch (type) { + case TextureType::Color1D: + return "sampler1DShadow"; + case TextureType::ColorArray1D: + return "sampler1DArrayShadow"; + case TextureType::Color2D: + return "sampler2DShadow"; + case TextureType::ColorArray2D: + return "sampler2DArrayShadow"; + case TextureType::ColorCube: + return "samplerCubeShadow"; + case TextureType::ColorArrayCube: + return "samplerCubeArrayShadow"; + default: + fmt::print("Texture type: {}", type); + throw NotImplementedException("Texture type: {}", type); + } + } switch (type) { case TextureType::Color1D: return "sampler1D"; @@ -110,6 +129,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; header += "#extension GL_ARB_sparse_texture2 : enable\n"; + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; @@ -227,7 +247,7 @@ void EmitContext::SetupImages(Bindings& bindings) { } texture_bindings.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { - const auto sampler_type{SamplerType(desc.type)}; + const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; texture_bindings.push_back(bindings.texture); const auto indices{bindings.texture + desc.count}; for (u32 index = bindings.texture; index < indices; ++index) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 71eb3ac2b..4381ed351 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -120,7 +120,17 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view dref, [[maybe_unused]] std::string_view bias_lc, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("Lod clamp samples"); + } + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const auto texture{Texture(ctx, info, index)}; + const auto vec_cast{info.type == TextureType::ColorArrayCube ? "vec4" : "vec3"}; + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, vec_cast, dref, coords, bias); } void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, @@ -130,7 +140,19 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view dref, [[maybe_unused]] std::string_view lod_lc, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); + } else { + ctx.AddF32("{}=textureLod({},vec3({},{}),{});", inst, texture, coords, dref, lod_lc); + } } void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From c9a25855bc208c0bd878f430c8d9fa6e6df44e46 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 29 May 2021 02:09:29 -0400 Subject: [PATCH 600/770] shader_recompiler: GCC fixes --- src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp | 1 + .../backend/glsl/emit_glsl_bitwise_conversion.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp | 1 + .../backend/glsl/emit_glsl_context_get_set.cpp | 3 +-- src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp | 1 + .../backend/glsl/emit_glsl_floating_point.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp | 1 + .../backend/glsl/emit_glsl_not_implemented.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_select.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 1 + src/shader_recompiler/backend/glsl/reg_alloc.cpp | 1 - 14 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 654196e55..ad2120670 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -6,6 +6,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 0fa99c526..3e8c648b1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 2cb935742..0fd667c8f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 2286177a7..dc0e9ef95 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { @@ -71,8 +72,6 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { if (offset.IsImmediate()) { - const auto u32_offset{offset.U32()}; - const auto index{(u32_offset / 4) % 4}; ctx.AddU32x2( "{}=uvec2(floatBitsToUint({}_cbuf{}[{}].{}),floatBitsToUint({}_cbuf{}[{}].{}));", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 087eaea8f..2ecfc2993 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index a12bf98d7..5f9603602 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 4381ed351..6962f2b91 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 07e1a4b51..73ceb06d9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp index aae32f7aa..338ff4bd6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 09ad35e44..0450a7c21 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index ec80f3cef..fd3140dd2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" #ifdef _MSC_VER diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index ad3713f2d..d3c8d330f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 0b85aaba2..1c212ec05 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index b287b870a..ecb550095 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -91,7 +91,6 @@ std::string RegAlloc::Consume(const IR::Value& value) { } std::string RegAlloc::Consume(IR::Inst& inst) { - const Id id{inst.Definition()}; inst.DestructiveRemoveUsage(); // TODO: reuse variables of same type if possible // if (!inst.HasUses()) { From 59a692e9edf385d56f84f38006cf15fff4372d6b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 14:10:24 -0400 Subject: [PATCH 601/770] glsl: Cleanup texture functions --- .../backend/glsl/emit_glsl_image.cpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 6962f2b91..68701ee52 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -26,8 +26,8 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info return fmt::format("int({})", value); case TextureType::ColorArray1D: case TextureType::Color2D: - return fmt::format("ivec2({})", value); case TextureType::ColorArray2D: + return fmt::format("ivec2({})", value); case TextureType::Color3D: case TextureType::ColorCube: return fmt::format("ivec3({})", value); @@ -65,7 +65,11 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, CastToIntVec(ctx.reg_alloc.Consume(offset), info), bias); } else { - ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); + } else { + ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords); + } } return; } @@ -104,6 +108,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse } return; } + // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, @@ -121,17 +126,7 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view dref, [[maybe_unused]] std::string_view bias_lc, [[maybe_unused]] const IR::Value& offset) { - const auto info{inst.Flags()}; - if (info.has_bias) { - throw NotImplementedException("Bias texture samples"); - } - if (info.has_lod_clamp) { - throw NotImplementedException("Lod clamp samples"); - } - const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; - const auto texture{Texture(ctx, info, index)}; - const auto vec_cast{info.type == TextureType::ColorArrayCube ? "vec4" : "vec3"}; - ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, vec_cast, dref, coords, bias); + throw NotImplementedException("GLSL Instruction"); } void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, @@ -148,6 +143,9 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, if (info.has_lod_clamp) { throw NotImplementedException("Lod clamp samples"); } + if (!offset.IsEmpty()) { + throw NotImplementedException("textureLodOffset"); + } const auto texture{Texture(ctx, info, index)}; if (info.type == TextureType::ColorArrayCube) { ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); From e4ba75570570007d4c85d6d28a4f890ce58b02e8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 14:21:25 -0400 Subject: [PATCH 602/770] glsl: Implement TEXS --- .../backend/glsl/emit_glsl_image.cpp | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 68701ee52..d721b018b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -38,6 +38,17 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info } } +std::string ShadowSamplerVecCast(TextureType type) { + switch (type) { + case TextureType::ColorArray2D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + return "vec4"; + default: + return "vec3"; + } +} + IR::Inst* PrepareSparse(IR::Inst& inst) { const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; if (sparse_inst) { @@ -126,7 +137,24 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view dref, [[maybe_unused]] std::string_view bias_lc, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("Lod clamp samples"); + } + if (!offset.IsEmpty()) { + throw NotImplementedException("textureLodOffset"); + } + const auto texture{Texture(ctx, info, index)}; + const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; + const auto cast{ShadowSamplerVecCast(info.type)}; + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); + } } void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, From 697eacd095f6568e43285499bba433a4eafe65d3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 15:03:28 -0400 Subject: [PATCH 603/770] glsl: Implement TLD instruction --- .../backend/glsl/emit_glsl_image.cpp | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index d721b018b..78e2d5bac 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -38,6 +38,24 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info } } +std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInfo& info) { + switch (info.type) { + case TextureType::Color1D: + return fmt::format("int({})", value); + case TextureType::ColorArray1D: + case TextureType::Color2D: + return fmt::format("ivec2({})", value); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorCube: + return fmt::format("ivec3({})", value); + case TextureType::ColorArrayCube: + return fmt::format("ivec4({})", value); + default: + throw NotImplementedException("Offset type {}", info.type.Value()); + } +} + std::string ShadowSamplerVecCast(TextureType type) { switch (type) { case TextureType::ColorArray2D: @@ -138,6 +156,10 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view bias_lc, [[maybe_unused]] const IR::Value& offset) { const auto info{inst.Flags()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("Sparse texture samples"); + } if (info.has_bias) { throw NotImplementedException("Bias texture samples"); } @@ -165,6 +187,10 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lod_lc, [[maybe_unused]] const IR::Value& offset) { const auto info{inst.Flags()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("Sparse texture samples"); + } if (info.has_bias) { throw NotImplementedException("Bias texture samples"); } @@ -204,7 +230,35 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst [[maybe_unused]] std::string_view coords, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view lod, [[maybe_unused]] std::string_view ms) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + if (info.has_bias) { + throw NotImplementedException("Bias texture samples"); + } + if (info.has_lod_clamp) { + throw NotImplementedException("Lod clamp samples"); + } + const auto texture{Texture(ctx, info, index)}; + const auto sparse_inst{PrepareSparse(inst)}; + const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + if (!sparse_inst) { + if (!offset.empty()) { + ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, + TexelFetchCastToInt(coords, info), lod, TexelFetchCastToInt(offset, info)); + } else { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, + TexelFetchCastToInt(coords, info), lod); + } + return; + } + // TODO: Query sparseTexels extension support + if (!offset.empty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, + CastToIntVec(offset, info), texel); + } else { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},{},{}));", *sparse_inst, + texture, CastToIntVec(coords, info), lod, texel); + } } void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 5fd92780b2d463fd4668472c41ef32ae4c15e9e6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 16:58:33 -0400 Subject: [PATCH 604/770] glsl: TLD4 implementation --- .../backend/glsl/emit_glsl_image.cpp | 91 ++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 78e2d5bac..e12d7b850 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -67,6 +67,23 @@ std::string ShadowSamplerVecCast(TextureType type) { } } +std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { + const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; + if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { + // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); + return ""; + } + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + throw LogicError("Invalid PTP arguments"); + } + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0), + read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2), + read(1, 3)); +} + IR::Inst* PrepareSparse(IR::Inst& inst) { const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; if (sparse_inst) { @@ -213,7 +230,45 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins [[maybe_unused]] std::string_view coords, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] const IR::Value& offset2) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + if (!offset2.IsEmpty()) { + ctx.Add("/*OFFSET 2 IS {}*/", ctx.reg_alloc.Consume(offset2)); + } + if (!sparse_inst) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, + info.gather_component); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, + CastToIntVec(ctx.reg_alloc.Consume(offset), info), info.gather_component); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets, + info.gather_component); + return; + } + // TODO: Query sparseTexels extension support + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", + *sparse_inst, texture, coords, texel, info.gather_component); + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), + CastToIntVec(ctx.reg_alloc.Consume(offset), info), texel, info.gather_component); + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", + *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel, + info.gather_component); } void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -222,7 +277,39 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] const IR::Value& offset2, [[maybe_unused]] std::string_view dref) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto sparse_inst{PrepareSparse(inst)}; + if (!sparse_inst) { + if (offset.IsEmpty()) { + ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); + return; + } + if (offset2.IsEmpty()) { + ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, + CastToIntVec(ctx.reg_alloc.Consume(offset), info)); + return; + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); + return; + } + // TODO: Query sparseTexels extension support + if (offset.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, + texture, coords, dref, texel); + } + if (offset2.IsEmpty()) { + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, + CastToIntVec(ctx.reg_alloc.Consume(offset), info), texel); + } + // PTP + const auto offsets{PtpOffsets(offset, offset2)}; + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", + *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); } void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 3047eb66889a9782fadfbe479c33e6a8bfc5bf53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 18:08:19 -0400 Subject: [PATCH 605/770] glsl: Implement TXQ and other misc changes --- .../backend/glsl/emit_glsl.cpp | 2 +- .../glsl/emit_glsl_context_get_set.cpp | 3 +++ .../backend/glsl/emit_glsl_image.cpp | 24 ++++++++++++++++++- .../glsl/emit_glsl_not_implemented.cpp | 2 +- .../backend/glsl/reg_alloc.cpp | 11 ++++++--- 5 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 800de58b7..8705daeee 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -190,7 +190,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR } ctx.code.insert(0, ctx.header); ctx.code += "}"; - fmt::print("\n{}\n", ctx.code); + // fmt::print("\n{}\n", ctx.code); return ctx.code; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index dc0e9ef95..ab7628a5a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -118,6 +118,9 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::VertexId: ctx.AddS32("{}=gl_VertexID;", inst); break; + case IR::Attribute::FrontFace: + ctx.AddS32("{}=gl_FrontFacing?-1:0;", inst); + break; default: fmt::print("Get attribute {}", attr); throw NotImplementedException("Get attribute {}", attr); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index e12d7b850..9213375b4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -351,7 +351,29 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view lod) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + switch (info.type) { + case TextureType::Color1D: + return ctx.AddU32x4( + "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, + texture, lod, texture); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + return ctx.AddU32x4( + "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, + texture, lod, texture); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + return ctx.AddU32x4( + "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, + lod, texture); + case TextureType::Buffer: + throw NotImplementedException("Texture buffers"); + } + throw LogicError("Unspecified image type {}", info.type.Value()); } void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index fd3140dd2..b8f95bd36 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -168,7 +168,7 @@ void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { } void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { - NotImplemented(); + ctx.Add("gl_FragDepth={};", value); } void EmitGetZFlag(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index ecb550095..b1de022d4 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -37,9 +37,14 @@ std::string FormatFloat(std::string_view value, IR::Type type) { return "uintBitsToFloat(0xff800000)"; } } - const bool needs_dot = value.find_first_of('.') == std::string_view::npos; - const bool needs_suffix = !value.ends_with('f'); - const auto suffix = type == IR::Type::F32 ? "f" : "lf"; + if (value.find_first_of('e') != std::string_view::npos) { + // scientific notation + const auto cast{type == IR::Type::F32 ? "float" : "double"}; + return fmt::format("{}({})", cast, value); + } + const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; + const bool needs_suffix{!value.ends_with('f')}; + const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); } From 005eecffcdcac64419d8441b3a724421b9b9816c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 18:47:17 -0400 Subject: [PATCH 606/770] glsl: Fix and implement rest of cbuf access --- .../glsl/emit_glsl_context_get_set.cpp | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index ab7628a5a..03caaacec 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -24,25 +24,61 @@ char OffsetSwizzle(u32 offset) { void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL"); + if (offset.IsImmediate()) { + ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}].{}),int({}),8);", inst, + ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + (offset.U32() % 4) * 8); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32( + "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", + inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + } } void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL"); + if (offset.IsImmediate()) { + ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}].{}),int({}),8);", inst, + ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + (offset.U32() % 4) * 8); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32( + "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + } } void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL"); + if (offset.IsImmediate()) { + ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}].{}),int({}),16);", inst, + ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ((offset.U32() / 2) % 2) * 16); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/" + "2)%2)*16),16);", + inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + } } void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL"); + if (offset.IsImmediate()) { + ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}].{}),int({}),16);", inst, + ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ((offset.U32() / 2) % 2) * 16); + } else { + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32( + "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);", + inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + } } void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -75,12 +111,12 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding ctx.AddU32x2( "{}=uvec2(floatBitsToUint({}_cbuf{}[{}].{}),floatBitsToUint({}_cbuf{}[{}].{}));", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - ctx.stage_name, binding.U32(), (offset.U32() + 1) / 16, - OffsetSwizzle(offset.U32() + 1)); + ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16, + OffsetSwizzle(offset.U32() + 4)); } else { const auto offset_var{ctx.reg_alloc.Consume(offset)}; ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" - "4)%4]),floatBitsToUint({}_cbuf{}[({}+1)/16][(({}+1/4))%4]));", + "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, binding.U32(), offset_var, offset_var); } From 1542f31e7979a7bae465d299774268533a130f9b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 20:00:06 -0400 Subject: [PATCH 607/770] glsl: minor cleanup --- .../backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_image.cpp | 3 --- .../backend/glsl/emit_glsl_integer.cpp | 19 ++++++++++--------- .../backend/glsl/emit_glsl_memory.cpp | 16 ++++++++-------- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index c9d629c40..7ae7c8766 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -135,6 +135,7 @@ public: std::vector image_bindings; bool uses_y_direction{}; + bool uses_cc_carry{}; private: void SetupExtensions(std::string& header); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 9213375b4..d1f7c5d91 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -234,9 +234,6 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!offset2.IsEmpty()) { - ctx.Add("/*OFFSET 2 IS {}*/", ctx.reg_alloc.Consume(offset2)); - } if (!sparse_inst) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 73ceb06d9..4a3d66c90 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -31,6 +31,7 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + ctx.uses_cc_carry = true; ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); ctx.AddU1("{}=carry!=0;", *carry, result); carry->Invalidate(); @@ -61,11 +62,11 @@ void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin } void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}={}*{};", inst, a, b); + ctx.AddU32("{}=uint({}*{});", inst, a, b); } void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=-({});", inst, value); + ctx.AddU32("{}=uint(-({}));", inst, value); } void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -124,7 +125,7 @@ void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std: void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view insert, std::string_view offset, std::string_view count) { - ctx.AddU32("{}=bitfieldInsert({}, {}, int({}), int({}));", inst, base, insert, offset, count); + ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count); } void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, @@ -166,25 +167,25 @@ void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst } void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}=min(int({}), int({}));", inst, a, b); + ctx.AddU32("{}=min(int({}),int({}));", inst, a, b); } void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}=min(uint({}), uint({}));", inst, a, b); + ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b); } void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}=max(int({}), int({}));", inst, a, b); + ctx.AddU32("{}=max(int({}),int({}));", inst, a, b); } void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}=max(uint({}), uint({}));", inst, a, b); + ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b); } void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max) { const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; - ctx.Add("{}=clamp(int({}), int({}), int({}));", result, value, min, max); + ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); } @@ -192,7 +193,7 @@ void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std: void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max) { const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; - ctx.Add("{}=clamp(uint({}), uint({}), uint({}));", result, value, min, max); + ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 0450a7c21..c66b7b788 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -50,16 +50,16 @@ void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindin void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(ssbo{}[{}/4],ssbo{}[{}/4+1]);", inst, binding.U32(), offset_var, + ctx.AddU32x2("{}=uvec2(ssbo{}[{}/4],ssbo{}[({}+4)/4]);", inst, binding.U32(), offset_var, binding.U32(), offset_var); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x4("{}=uvec4(ssbo{}[{}/4],ssbo{}[{}/4+1],ssbo{}[{}/4+2],ssbo{}[{}/4+3]);", inst, - binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var, - binding.U32(), offset_var); + ctx.AddU32x4("{}=uvec4(ssbo{}[{}/4],ssbo{}[({}+4)/4],ssbo{}[({}+8)/4],ssbo{}[({}+12)/4]);", + inst, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), + offset_var, binding.U32(), offset_var); } void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, @@ -108,7 +108,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va std::string_view value) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}/4)+1]={}.y;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}+4)/4]={}.y;", binding.U32(), offset_var, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, @@ -117,8 +117,8 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}/4)+1]={}.y;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}/4)+2]={}.z;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}/4)+3]={}.w;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}+4)/4]={}.y;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}+8)/4]={}.z;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}+12)/4]={}.w;", binding.U32(), offset_var, value); } } // namespace Shader::Backend::GLSL From 80eec858678abeec988ca6390cb2e0636cfcdc98 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 20:19:24 -0400 Subject: [PATCH 608/770] glsl: Fix GetAttribute return values fixes font rendering issues as these were used to index into the ssbos --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 12 +++++++----- .../backend/glsl/emit_glsl_context_get_set.cpp | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 8705daeee..f9ad71f92 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -83,7 +83,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - // ctx.Add("/* {} */", inst->GetOpcode()); + // ctx.Add("/* $ {} $ */", inst->GetOpcode()); switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ @@ -183,11 +183,13 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); } - // TODO: track usage - ctx.header += "uint carry;"; - if (program.info.uses_subgroup_shuffles) { - ctx.header += "bool shfl_in_bounds;\n"; + if (ctx.uses_cc_carry) { + ctx.header += "uint carry;"; } + if (program.info.uses_subgroup_shuffles) { + ctx.header += "bool shfl_in_bounds;"; + } + ctx.header += "\n"; ctx.code.insert(0, ctx.header); ctx.code += "}"; // fmt::print("\n{}\n", ctx.code); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 03caaacec..c20747819 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -149,13 +149,13 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } break; case IR::Attribute::InstanceId: - ctx.AddS32("{}=gl_InstanceID;", inst); + ctx.AddF32("{}=intBitsToFloat(gl_InstanceID);", inst); break; case IR::Attribute::VertexId: - ctx.AddS32("{}=gl_VertexID;", inst); + ctx.AddF32("{}=intBitsToFloat(gl_VertexID);", inst); break; case IR::Attribute::FrontFace: - ctx.AddS32("{}=gl_FrontFacing?-1:0;", inst); + ctx.AddF32("{}=intBitsToFloat(gl_FrontFacing?-1:0);", inst); break; default: fmt::print("Get attribute {}", attr); From 7df0815117c6bdc70775d78b4625f44835ede54a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 21:12:52 -0400 Subject: [PATCH 609/770] glsl: Implement more instructions used by SMO --- .../backend/glsl/emit_glsl_context_get_set.cpp | 6 +++++- .../backend/glsl/emit_glsl_convert.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 4 +++- .../backend/glsl/emit_glsl_instructions.h | 6 +++--- .../backend/glsl/emit_glsl_not_implemented.cpp | 12 ++++++------ 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index c20747819..4bb20b8fa 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -148,6 +148,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, throw NotImplementedException("Get Position for stage {}", ctx.stage); } break; + case IR::Attribute::PointSpriteS: + case IR::Attribute::PointSpriteT: + ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); + break; case IR::Attribute::InstanceId: ctx.AddF32("{}=intBitsToFloat(gl_InstanceID);", inst); break; @@ -174,7 +178,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val } switch (attr) { case IR::Attribute::PointSize: - ctx.Add("gl_Pointsize={};", value); + ctx.Add("gl_PointSize={};", value); break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 2ecfc2993..85d07b4de 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -16,7 +16,7 @@ void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertS16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddS32("{}=int(float({}))&0xffff;", inst, value); } void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index d1f7c5d91..e63e3f2bd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -376,7 +376,9 @@ void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused] void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords) { - throw NotImplementedException("GLSL Instruction"); + const auto info{inst.Flags()}; + const auto texture{Texture(ctx, info, index)}; + return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); } void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 07408d9e9..a1806b7f5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -86,10 +86,10 @@ void EmitSetZFlag(EmitContext& ctx); void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); -void EmitWorkgroupId(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); -void EmitInvocationId(EmitContext& ctx); -void EmitSampleId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); +void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b8f95bd36..9af9ebeac 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -203,15 +203,15 @@ void EmitSetOFlag(EmitContext& ctx) { NotImplemented(); } -void EmitWorkgroupId(EmitContext& ctx) { +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_WorkGroupID;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitInvocationId(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSampleId(EmitContext& ctx) { +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } From 57d354b02ced63d7a0fcb01f1f674a910054cdd1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 21:12:52 -0400 Subject: [PATCH 610/770] glsl: Implement more instructions used by SMO --- src/shader_recompiler/backend/glsl/emit_context.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 94ba9af7c..fd0113c8d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -96,7 +96,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile program.workgroup_size[2]); break; } - const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { @@ -174,8 +173,9 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { return; } for (const auto& desc : info.storage_buffers_descriptors) { - header += fmt::format("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", - bindings.storage_buffer, bindings.storage_buffer, desc.cbuf_index); + header += + fmt::format("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", + bindings.storage_buffer, bindings.storage_buffer, bindings.storage_buffer); bindings.storage_buffer += desc.count; } } From 181a4ffdc477e56c82d5de17e242c64ee70275c2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 29 May 2021 23:31:58 -0400 Subject: [PATCH 611/770] glsl: Implement ST{LS} --- .../backend/glsl/emit_context.cpp | 5 -- .../backend/glsl/emit_glsl.cpp | 10 +++ .../glsl/emit_glsl_context_get_set.cpp | 8 ++ .../backend/glsl/emit_glsl_instructions.h | 16 ++-- .../glsl/emit_glsl_not_implemented.cpp | 56 ------------- .../backend/glsl/emit_glsl_shared_memory.cpp | 80 +++++++++++++++++++ 6 files changed, 106 insertions(+), 69 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index fd0113c8d..26969a26d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -118,11 +118,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineStorageBuffers(bindings); SetupImages(bindings); DefineHelperFunctions(); - - header += "void main(){\n"; - if (stage == Stage::VertexA || stage == Stage::VertexB) { - Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } } void EmitContext::SetupExtensions(std::string&) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index f9ad71f92..bac4b830d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -180,6 +180,16 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR EmitCode(ctx, program); const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); + if (program.local_memory_size > 0) { + ctx.header += fmt::format("uint lmem[{}];", program.local_memory_size / 4); + } + if (program.shared_memory_size > 0) { + ctx.header += fmt::format("shared uint smem[{}];", program.shared_memory_size / 4); + } + ctx.header += "void main(){\n"; + if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { + ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; + } for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 4bb20b8fa..46ce413bf 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -201,4 +201,12 @@ void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); } +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { + ctx.AddU32("{}=lmem[{}];", inst, word_offset); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + ctx.Add("lmem[{}]={};", word_offset, value); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index a1806b7f5..72d97c7e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -92,7 +92,7 @@ void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); void EmitIsHelperInvocation(EmitContext& ctx); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); -void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); +void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); @@ -141,13 +141,13 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va std::string_view value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value); -void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); -void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset); void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 9af9ebeac..b182298b0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -224,14 +224,6 @@ void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); } -void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { - NotImplemented(); -} - -void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { - NotImplemented(); -} - void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { ctx.AddU1("{}=false;", inst); } @@ -308,54 +300,6 @@ void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_ NotImplemented(); } -void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { - NotImplemented(); -} - -void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - -void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index e69de29bb..8a4c69547 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -0,0 +1,80 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int({}%4)*8,8);", inst, offset, offset); +} + +void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int(({}/2)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int(({}/2)%2)*16,16);", inst, offset, offset); +} + +void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddU32("{}=smem[{}/4];", inst, offset); +} + +void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddU32x2("{}=uvec2(smem[{}/4],smem[({}+4)/4]);", inst, offset, offset); +} + +void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] std::string_view offset) { + ctx.AddU32x4("{}=uvec4(smem[{}/4],smem[({}+4)/4],smem[({}+8)/4],smem[({}+12)/4]);", inst, + offset, offset, offset, offset); +} + +void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value) { + ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int({}%4)*8,8);", offset, offset, value, + offset); +} + +void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value) { + ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int(({}/2)%2)*16,16);", offset, offset, value, + offset); +} + +void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value) { + ctx.Add("smem[{}/4]={};", offset, value); +} + +void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value) { + ctx.Add("smem[{}/4]={}.x;", offset, value); + ctx.Add("smem[({}+4)/4]={}.y;", offset, value); +} + +void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value) { + ctx.Add("smem[{}/4]={}.x;", offset, value); + ctx.Add("smem[({}+4)/4]={}.y;", offset, value); + ctx.Add("smem[({}+8)/4]={}.z;", offset, value); + ctx.Add("smem[({}+12)/4]={}.w;", offset, value); +} + +} // namespace Shader::Backend::GLSL From 770b754afde60658877c9063704d03ea385d40b5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 00:08:39 -0400 Subject: [PATCH 612/770] glsl: Implement VOTE --- .../backend/glsl/emit_context.cpp | 1 + .../backend/glsl/emit_glsl_instructions.h | 20 +++---- .../glsl/emit_glsl_not_implemented.cpp | 40 -------------- .../backend/glsl/emit_glsl_warp.cpp | 53 +++++++++++++++++++ 4 files changed, 64 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 26969a26d..5456d4e5b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,6 +148,7 @@ void EmitContext::SetupExtensions(std::string&) { if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n"; + header += "#extension GL_ARB_shader_group_vote : enable\n"; } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 72d97c7e1..1e7247358 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -679,16 +679,16 @@ void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& ind std::string_view coords, std::string_view value); void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value); -void EmitLaneId(EmitContext& ctx); -void EmitVoteAll(EmitContext& ctx, std::string_view pred); -void EmitVoteAny(EmitContext& ctx, std::string_view pred); -void EmitVoteEqual(EmitContext& ctx, std::string_view pred); -void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); -void EmitSubgroupEqMask(EmitContext& ctx); -void EmitSubgroupLtMask(EmitContext& ctx); -void EmitSubgroupLeMask(EmitContext& ctx); -void EmitSubgroupGtMask(EmitContext& ctx); -void EmitSubgroupGeMask(EmitContext& ctx); +void EmitLaneId(EmitContext& ctx, IR::Inst& inst); +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b182298b0..088c86f30 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -527,44 +527,4 @@ void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value NotImplemented(); } -void EmitLaneId(EmitContext& ctx) { - NotImplemented(); -} - -void EmitVoteAll(EmitContext& ctx, std::string_view pred) { - NotImplemented(); -} - -void EmitVoteAny(EmitContext& ctx, std::string_view pred) { - NotImplemented(); -} - -void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { - NotImplemented(); -} - -void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { - NotImplemented(); -} - -void EmitSubgroupEqMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupLtMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupLeMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupGtMask(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSubgroupGeMask(EmitContext& ctx) { - NotImplemented(); -} - } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 1c212ec05..e462c977c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -36,6 +37,58 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, } } // namespace +void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + // TODO: + // if (ctx.profile.warp_size_potentially_larger_than_guest) { + // } +} + +void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + // TODO: + // if (ctx.profile.warp_size_potentially_larger_than_guest) { + // } +} + +void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + // TODO: + // if (ctx.profile.warp_size_potentially_larger_than_guest) { + // } +} + +void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { + ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); + // TODO: + // if (ctx.profile.warp_size_potentially_larger_than_guest) { + // } +} + +void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uvec2(gl_SubGroupEqMaskARB).x;", inst); +} + +void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uvec2(gl_SubGroupLtMaskARB).x;", inst); +} + +void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uvec2(gl_SubGroupLeMaskARB).x;", inst); +} + +void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uvec2(gl_SubGroupGtMaskARB).x;", inst); +} + +void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uvec2(gl_SubGroupGeMaskARB).x;", inst); +} + void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { From e35ffbbeb0f85f676416fcb8f0bb0207671f379d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 00:53:26 -0400 Subject: [PATCH 613/770] glsl: Implement VOTE for subgroup size potentially larger --- .../backend/glsl/emit_context.cpp | 12 ++++-- .../backend/glsl/emit_glsl_warp.cpp | 43 ++++++++++++------- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ .../renderer_opengl/gl_shader_cache.cpp | 2 +- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 5456d4e5b..c6325e55f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; - header += "#extension GL_ARB_sparse_texture2 : enable\n"; - header += "#extension GL_EXT_texture_shadow_lod : enable\n"; - // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; + if (stage != Stage::Compute) { + // TODO: track this usage + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } @@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n"; header += "#extension GL_ARB_shader_group_vote : enable\n"; + header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; + if (!info.uses_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index e462c977c..8a018acb5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in } void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); + } } void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=anyInvocationARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); + } } void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); + } } void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); + } else { + ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); + } } void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 071133781..20ea42cff 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9b9402c29..ff0ff2b08 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool IsWarpSizePotentiallyLargerThanGuest() const { + return warp_size_potentially_larger_than_guest; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -161,6 +165,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool warp_size_potentially_larger_than_guest{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8a052851b..cd11ff653 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .warp_size_potentially_larger_than_guest = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), .lower_left_origin_mode = true, .need_declared_frag_colors = true, From 68ef3803bfb53454b4d6f1cd163ccbfa29af38c8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 01:18:20 -0400 Subject: [PATCH 614/770] glsl: Use gl_SubGroupInvocationARB --- .../backend/glsl/emit_context.cpp | 1 - .../backend/glsl/emit_glsl_warp.cpp | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index c6325e55f..02d88b7ad 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -151,7 +151,6 @@ void EmitContext::SetupExtensions(std::string&) { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n"; header += "#extension GL_ARB_shader_group_vote : enable\n"; - header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; if (!info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 8a018acb5..4286f29c7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -45,8 +45,8 @@ void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); } else { - const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; - const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); } } @@ -55,8 +55,8 @@ void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU1("{}=anyInvocationARB({});", inst, pred); } else { - const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; - const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); } } @@ -65,8 +65,8 @@ void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); } else { - const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; - const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; const auto value{fmt::format("({}^{})", ballot, active_mask)}; ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); } @@ -76,7 +76,7 @@ void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) if (!ctx.profile.warp_size_potentially_larger_than_guest) { ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); } else { - ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); + ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); } } From 9ccbd749914a3371893ee3d6c1bdcb50c7f777ab Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 14:31:59 -0400 Subject: [PATCH 615/770] glsl: Fix ATOM and implement ATOMS --- .../backend/glsl/emit_context.cpp | 4 +- .../backend/glsl/emit_glsl_atomic.cpp | 156 +++++++++++++----- .../backend/glsl/emit_glsl_instructions.h | 24 +-- .../glsl/emit_glsl_not_implemented.cpp | 60 ------- .../ir_opt/collect_shader_info_pass.cpp | 6 + 5 files changed, 136 insertions(+), 114 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 02d88b7ad..b9594de40 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -182,10 +182,10 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { } void EmitContext::DefineHelperFunctions() { - if (info.uses_global_increment) { + if (info.uses_global_increment || info.uses_shared_increment) { header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; } - if (info.uses_global_decrement) { + if (info.uses_global_decrement || info.uses_shared_decrement) { header += "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index ad2120670..16791be84 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -11,31 +11,103 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view cas_loop{R"(uint {}; +static constexpr std::string_view cas_loop{R"({}; for (;;){{ uint old_value={}; {}=atomicCompSwap({},old_value,{}({},{})); if ({}==old_value){{break;}} }})"}; -void CasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset, std::string_view value, std::string_view function) { - const auto ret{ctx.reg_alloc.Define(inst)}; +void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view value, std::string_view function) { + const auto ret{ctx.reg_alloc.Define(inst, Type::U32)}; + const std::string smem{fmt::format("smem[{}/4]", offset)}; + ctx.Add(cas_loop.data(), ret, smem, ret, smem, function, smem, value, ret); +} + +void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const auto ret{ctx.reg_alloc.Define(inst, Type::U32)}; const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); } -void CasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - const IR::Value& offset, std::string_view value, std::string_view function) { +void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, + std::string_view function) { const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; - const std::string u32_value{fmt::format("floatBitsToUint({})", value)}; - const auto ret{ctx.reg_alloc.Define(inst)}; - const auto ret_32{ret + "_u32"}; - ctx.Add(cas_loop.data(), ret_32, ssbo, ret_32, ssbo, function, ssbo, value, ret_32); - ctx.Add("float {}=uintBitsToFloat({});", ret, ret_32); + const auto ret{ctx.reg_alloc.Define(inst, Type::U32)}; + ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.AddF32("{}=uintBitsToFloat({});", inst, ret); +} +} // namespace + +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAdd(smem[{}/4],{});", inst, pointer_offset, value); } -} // namespace +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32"); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMin(smem[{}/4],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + const std::string u32_value{fmt::format("uint({})", value)}; + SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32"); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicMax(smem[{}/4],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement"); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement"); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicAnd(smem[{}/4],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicOr(smem[{}/4],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicXor(smem[{}/4],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + ctx.AddU32("{}=atomicExchange(smem[{}/4],{});", inst, pointer_offset, value); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + const auto ret{ctx.reg_alloc.Define(inst, Type::U64)}; + ctx.Add("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", ret, pointer_offset, + pointer_offset); + ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset, + value, pointer_offset, value); +} void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { @@ -45,7 +117,7 @@ void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const std::string u32_value{fmt::format("uint({})", value)}; - CasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); } void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -56,7 +128,7 @@ void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const std::string u32_value{fmt::format("uint({})", value)}; - CasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); + SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); } void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -66,12 +138,12 @@ void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasIncrement"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement"); } void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasDecrement"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement"); } void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -97,7 +169,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + ctx.AddU64("{}=packUint2x32(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); ctx.Add("ssbo{}[{}]+=unpackUint2x32({}).x;ssbo{}[{}]+=unpackUint2x32({}).y;", binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); @@ -106,7 +178,7 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + ctx.AddS64("{}=packInt2x32(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); ctx.Add("for(int i=0;i<2;++i){{ " "ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", @@ -116,7 +188,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + ctx.AddU64("{}=packUint2x32(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); ctx.Add( "for(int i=0;i<2;++i){{ ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", @@ -126,7 +198,7 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + ctx.AddS64("{}=packInt2x32(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); ctx.Add("for(int i=0;i<2;++i){{ " "ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", @@ -136,7 +208,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), + ctx.AddU64("{}=packUint2x32(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); ctx.Add( "for(int i=0;i<2;++i){{ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", @@ -145,65 +217,69 @@ void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=uint64_t(uvec2(atomicAnd(ssbo{}[{}],unpackUint2x32({}).x),atomicAnd(ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicAnd(ssbo{}[{}],unpackUint2x32({}).x),atomicAnd(ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=uint64_t(uvec2(atomicOr(ssbo{}[{}],unpackUint2x32({}).x),atomicOr(ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicOr(ssbo{}[{}],unpackUint2x32({}).x),atomicOr(ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=uint64_t(uvec2(atomicXor(ssbo{}[{}],unpackUint2x32({}).x),atomicXor(ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicXor(ssbo{}[{}],unpackUint2x32({}).x),atomicXor(ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=uint64_t(uvec2(atomicExchange(ssbo{}[{}],unpackUint2x32({}).x),atomicExchange(" - "ssbo{}[{}],unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicExchange(ssbo{}[{}],unpackUint2x32({}).x),atomicExchange(" + "ssbo{}[{}],unpackUint2x32({}).y)));", + inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); + SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); } void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); } void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); } void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); } void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); } void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); } void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - CasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); + SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); } void EmitGlobalAtomicIAdd32(EmitContext&) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 1e7247358..703db80ee 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -421,29 +421,29 @@ void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lh std::string_view rhs); void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 088c86f30..22ea9c9b1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -324,66 +324,6 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - -void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, - std::string_view value) { - NotImplemented(); -} - void EmitBindlessImageAtomicIAdd32(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c22e5992a..dc78cdefb 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -703,6 +703,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::WriteStorage128: info.used_storage_buffer_types |= IR::Type::U32x4; break; + case IR::Opcode::SharedAtomicSMin32: + info.uses_atomic_s32_min = true; + break; + case IR::Opcode::SharedAtomicSMax32: + info.uses_atomic_s32_max = true; + break; case IR::Opcode::SharedAtomicInc32: info.uses_shared_increment = true; break; From 1269a0cf8b3844c1a9bb06c843a7698b0a9643d5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 17:27:00 -0400 Subject: [PATCH 616/770] glsl: Rework variable allocator to allow for variable reuse --- src/shader_recompiler/CMakeLists.txt | 4 +- .../backend/glsl/emit_context.h | 36 +-- .../backend/glsl/emit_glsl.cpp | 29 +- .../backend/glsl/emit_glsl_atomic.cpp | 11 +- .../glsl/emit_glsl_bitwise_conversion.cpp | 2 +- .../glsl/emit_glsl_context_get_set.cpp | 14 +- .../backend/glsl/emit_glsl_image.cpp | 26 +- .../backend/glsl/emit_glsl_integer.cpp | 10 +- .../backend/glsl/emit_glsl_memory.cpp | 28 +- .../glsl/emit_glsl_not_implemented.cpp | 10 +- .../backend/glsl/reg_alloc.cpp | 191 ------------ .../backend/glsl/reg_alloc.h | 84 ----- .../backend/glsl/var_alloc.cpp | 290 ++++++++++++++++++ .../backend/glsl/var_alloc.h | 100 ++++++ 14 files changed, 482 insertions(+), 353 deletions(-) delete mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.cpp delete mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.h create mode 100644 src/shader_recompiler/backend/glsl/var_alloc.cpp create mode 100644 src/shader_recompiler/backend/glsl/var_alloc.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 9b2240931..e89a0ee16 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -48,8 +48,8 @@ add_library(shader_recompiler STATIC backend/glsl/emit_glsl_special.cpp backend/glsl/emit_glsl_undefined.cpp backend/glsl/emit_glsl_warp.cpp - backend/glsl/reg_alloc.cpp - backend/glsl/reg_alloc.h + backend/glsl/var_alloc.cpp + backend/glsl/var_alloc.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 7ae7c8766..2f1062954 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -10,7 +10,7 @@ #include -#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/backend/glsl/var_alloc.h" #include "shader_recompiler/stage.h" namespace Shader { @@ -35,81 +35,81 @@ public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_); - template + template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { - code += fmt::format(format_str, reg_alloc.Define(inst, type), std::forward(args)...); + code += fmt::format(format_str, var_alloc.Define(inst, type), std::forward(args)...); // TODO: Remove this code += '\n'; } template void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddS32(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddS64(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); + Add(format_str, inst, args...); } template @@ -121,7 +121,7 @@ public: std::string header; std::string code; - RegAlloc reg_alloc; + VarAlloc var_alloc; const Info& info; const Profile& profile; const RuntimeInfo& runtime_info; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index bac4b830d..4304ee4d5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -33,7 +33,7 @@ void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { template auto Arg(EmitContext& ctx, const IR::Value& arg) { if constexpr (std::is_same_v) { - return ctx.reg_alloc.Consume(arg); + return ctx.var_alloc.Consume(arg); } else if constexpr (std::is_same_v) { return arg; } else if constexpr (std::is_same_v) { @@ -131,7 +131,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } break; case IR::AbstractSyntaxNode::Type::If: - ctx.Add("if ({}){{", ctx.reg_alloc.Consume(node.data.if_node.cond)); + ctx.Add("if ({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); break; case IR::AbstractSyntaxNode::Type::EndIf: ctx.Add("}}"); @@ -142,7 +142,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("break;"); } } else { - ctx.Add("if({}){{break;}}", ctx.reg_alloc.Consume(node.data.break_node.cond)); + ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond)); } break; case IR::AbstractSyntaxNode::Type::Return: @@ -153,7 +153,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("if({}){{", ctx.reg_alloc.Consume(node.data.repeat.cond)); + ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.repeat.cond)); ctx.Add("continue;\n}}else{{"); ctx.Add("break;\n}}\n}}"); break; @@ -171,6 +171,23 @@ std::string GlslVersionSpecifier(const EmitContext& ctx) { } return ""; } + +void DefineVariables(const EmitContext& ctx, std::string& header) { + for (u32 i = 0; i < static_cast(GlslVarType::Void); ++i) { + const auto type{static_cast(i)}; + const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; + const auto type_name{ctx.var_alloc.GetGlslType(type)}; + // Temps/return types that are never used are stored at index 0 + if (tracker.uses_temp) { + header += fmt::format("{}{}={}(0);", type_name, ctx.var_alloc.Representation(0, type), + type_name); + } + for (u32 index = 1; index <= tracker.num_used; ++index) { + header += fmt::format("{}{}={}(0);", type_name, + ctx.var_alloc.Representation(index, type), type_name); + } + } +} } // Anonymous namespace std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, @@ -190,9 +207,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; } - for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { - ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); - } + DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { ctx.header += "uint carry;"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 16791be84..918f90058 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -20,14 +20,14 @@ for (;;){{ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, std::string_view value, std::string_view function) { - const auto ret{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const std::string smem{fmt::format("smem[{}/4]", offset)}; ctx.Add(cas_loop.data(), ret, smem, ret, smem, function, smem, value, ret); } void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value, std::string_view function) { - const auto ret{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); } @@ -36,7 +36,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi const IR::Value& offset, std::string_view value, std::string_view function) { const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; - const auto ret{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); ctx.AddF32("{}=uintBitsToFloat({});", inst, ret); } @@ -102,9 +102,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); - const auto ret{ctx.reg_alloc.Define(inst, Type::U64)}; - ctx.Add("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", ret, pointer_offset, - pointer_offset); + ctx.AddU64("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", inst, pointer_offset, + pointer_offset); ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset, value, pointer_offset, value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 3e8c648b1..2b08aa593 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -26,7 +26,7 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { } void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { - ctx.AddU1("{}={};", inst, ctx.reg_alloc.Consume(value)); + ctx.AddU1("{}={};", inst, ctx.var_alloc.Consume(value)); } void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 46ce413bf..8688686e8 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -29,7 +29,7 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32( "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); @@ -44,7 +44,7 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32( "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); @@ -59,7 +59,7 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/" "2)%2)*16),16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); @@ -74,7 +74,7 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32( "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); @@ -87,7 +87,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32())); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -99,7 +99,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ctx.AddF32("{}={}_cbuf{}[{}].{};", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32())); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -114,7 +114,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16, OffsetSwizzle(offset.U32() + 4)); } else { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index e63e3f2bd..eb427d8b5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -104,12 +104,12 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse } const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; - const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { if (!offset.IsEmpty()) { ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, - CastToIntVec(ctx.reg_alloc.Consume(offset), info), bias); + CastToIntVec(ctx.var_alloc.Consume(offset), info), bias); } else { if (ctx.stage == Stage::Fragment) { ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); @@ -122,7 +122,7 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", - *sparse_inst, texture, coords, CastToIntVec(ctx.reg_alloc.Consume(offset), info), + *sparse_inst, texture, coords, CastToIntVec(ctx.var_alloc.Consume(offset), info), texel, bias); } else { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, @@ -143,12 +143,12 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse throw NotImplementedException("Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; - const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { if (!offset.IsEmpty()) { ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, - CastToIntVec(ctx.reg_alloc.Consume(offset), info)); + CastToIntVec(ctx.var_alloc.Consume(offset), info)); } else { ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); } @@ -158,7 +158,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, - CastToIntVec(ctx.reg_alloc.Consume(offset), info), texel); + CastToIntVec(ctx.var_alloc.Consume(offset), info), texel); } else { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, texture, coords, lod_lc, texel); @@ -232,7 +232,7 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins [[maybe_unused]] const IR::Value& offset2) { const auto info{inst.Flags()}; const auto texture{Texture(ctx, info, index)}; - const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { if (offset.IsEmpty()) { @@ -242,7 +242,7 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins } if (offset2.IsEmpty()) { ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, - CastToIntVec(ctx.reg_alloc.Consume(offset), info), info.gather_component); + CastToIntVec(ctx.var_alloc.Consume(offset), info), info.gather_component); return; } // PTP @@ -259,7 +259,7 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins if (offset2.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", *sparse_inst, texture, CastToIntVec(coords, info), - CastToIntVec(ctx.reg_alloc.Consume(offset), info), texel, info.gather_component); + CastToIntVec(ctx.var_alloc.Consume(offset), info), texel, info.gather_component); } // PTP const auto offsets{PtpOffsets(offset, offset2)}; @@ -276,7 +276,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: [[maybe_unused]] std::string_view dref) { const auto info{inst.Flags()}; const auto texture{Texture(ctx, info, index)}; - const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { if (offset.IsEmpty()) { @@ -285,7 +285,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: } if (offset2.IsEmpty()) { ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, - CastToIntVec(ctx.reg_alloc.Consume(offset), info)); + CastToIntVec(ctx.var_alloc.Consume(offset), info)); return; } // PTP @@ -301,7 +301,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: if (offset2.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), dref, - CastToIntVec(ctx.reg_alloc.Consume(offset), info), texel); + CastToIntVec(ctx.var_alloc.Consume(offset), info), texel); } // PTP const auto offsets{PtpOffsets(offset, offset2)}; @@ -323,7 +323,7 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst } const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; - const auto texel{ctx.reg_alloc.Define(inst, Type::F32x4)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; if (!sparse_inst) { if (!offset.empty()) { ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 4a3d66c90..1c7413cd4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -29,7 +29,7 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { } } // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { ctx.uses_cc_carry = true; ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); @@ -130,7 +130,7 @@ void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count) { - const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); @@ -138,7 +138,7 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view bas void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view offset, std::string_view count) { - const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); @@ -184,7 +184,7 @@ void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max) { - const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); @@ -192,7 +192,7 @@ void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std: void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, std::string_view max) { - const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index c66b7b788..8ce186733 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -12,7 +12,7 @@ namespace Shader::Backend::GLSL { void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int({}%4)*8,8);", inst, binding.U32(), offset_var, offset_var); } @@ -20,7 +20,7 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int({}%4)*8,8);", inst, binding.U32(), offset_var, offset_var); } @@ -28,7 +28,7 @@ void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, binding.U32(), offset_var, offset_var); } @@ -36,27 +36,27 @@ void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, binding.U32(), offset_var, offset_var); } void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=ssbo{}[{}/4];", inst, binding.U32(), offset_var); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32x2("{}=uvec2(ssbo{}[{}/4],ssbo{}[({}+4)/4]);", inst, binding.U32(), offset_var, binding.U32(), offset_var); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32x4("{}=uvec4(ssbo{}[{}/4],ssbo{}[({}+4)/4],ssbo{}[({}+8)/4],ssbo{}[({}+12)/4]);", inst, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var); @@ -66,7 +66,7 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), offset_var, binding.U32(), offset_var, value, offset_var); } @@ -75,7 +75,7 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), offset_var, binding.U32(), offset_var, value, offset_var); } @@ -84,7 +84,7 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), offset_var, binding.U32(), offset_var, value, offset_var); } @@ -93,20 +93,20 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]={};", binding.U32(), offset_var, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); ctx.Add("ssbo{}[({}+4)/4]={}.y;", binding.U32(), offset_var, value); } @@ -115,7 +115,7 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - const auto offset_var{ctx.reg_alloc.Consume(offset)}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); ctx.Add("ssbo{}[({}+4)/4]={}.y;", binding.U32(), offset_var, value); ctx.Add("ssbo{}[({}+8)/4]={}.z;", binding.U32(), offset_var, value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 22ea9c9b1..806c4777b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -21,11 +21,11 @@ static void NotImplemented() { void EmitPhi(EmitContext& ctx, IR::Inst& phi) { const size_t num_args{phi.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { - ctx.reg_alloc.Consume(phi.Arg(i)); + ctx.var_alloc.Consume(phi.Arg(i)); } if (!phi.Definition().is_valid) { // The phi node wasn't forward defined - ctx.Add("{};", ctx.reg_alloc.Define(phi, phi.Arg(0).Type())); + ctx.Add("{};", ctx.var_alloc.Define(phi, phi.Arg(0).Type())); } } @@ -42,10 +42,10 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& const auto phi_type{phi.Arg(0).Type()}; if (!phi.Definition().is_valid) { // The phi node wasn't forward defined - ctx.Add("{};", ctx.reg_alloc.Define(phi, phi_type)); + ctx.Add("{};", ctx.var_alloc.Define(phi, phi_type)); } - const auto phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; - const auto val_reg{ctx.reg_alloc.Consume(value)}; + const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; + const auto val_reg{ctx.var_alloc.Consume(value)}; if (phi_reg == val_reg) { return; } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp deleted file mode 100644 index b1de022d4..000000000 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "shader_recompiler/backend/glsl/reg_alloc.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::Backend::GLSL { -namespace { -std::string Representation(Id id) { - if (id.is_condition_code != 0) { - throw NotImplementedException("Condition code"); - } - if (id.is_spill != 0) { - throw NotImplementedException("Spilling"); - } - const u32 index{static_cast(id.index)}; - return fmt::format("R{}", index); -} - -std::string FormatFloat(std::string_view value, IR::Type type) { - // TODO: Confirm FP64 nan/inf - if (type == IR::Type::F32) { - if (value == "nan") { - return "uintBitsToFloat(0x7fc00000)"; - } - if (value == "inf") { - return "uintBitsToFloat(0x7f800000)"; - } - if (value == "-inf") { - return "uintBitsToFloat(0xff800000)"; - } - } - if (value.find_first_of('e') != std::string_view::npos) { - // scientific notation - const auto cast{type == IR::Type::F32 ? "float" : "double"}; - return fmt::format("{}({})", cast, value); - } - const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; - const bool needs_suffix{!value.ends_with('f')}; - const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; - return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); -} - -std::string MakeImm(const IR::Value& value) { - switch (value.Type()) { - case IR::Type::U1: - return fmt::format("{}", value.U1() ? "true" : "false"); - case IR::Type::U32: - return fmt::format("{}u", value.U32()); - case IR::Type::F32: - return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); - case IR::Type::U64: - return fmt::format("{}ul", value.U64()); - case IR::Type::F64: - return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); - case IR::Type::Void: - return ""; - default: - throw NotImplementedException("Immediate type {}", value.Type()); - } -} -} // Anonymous namespace - -std::string RegAlloc::Define(IR::Inst& inst) { - const Id id{Alloc()}; - inst.SetDefinition(id); - return Representation(id); -} - -std::string RegAlloc::Define(IR::Inst& inst, Type type) { - const Id id{Alloc()}; - std::string type_str = ""; - if (!register_defined[id.index]) { - register_defined[id.index] = true; - // type_str = GetGlslType(type); - reg_types.push_back(GetGlslType(type)); - ++num_used_registers; - } - inst.SetDefinition(id); - return type_str + Representation(id); -} - -std::string RegAlloc::Define(IR::Inst& inst, IR::Type type) { - return Define(inst, RegType(type)); -} - -std::string RegAlloc::Consume(const IR::Value& value) { - return value.IsImmediate() ? MakeImm(value) : Consume(*value.InstRecursive()); -} - -std::string RegAlloc::Consume(IR::Inst& inst) { - inst.DestructiveRemoveUsage(); - // TODO: reuse variables of same type if possible - // if (!inst.HasUses()) { - // Free(id); - // } - return Representation(inst.Definition()); -} - -Type RegAlloc::RegType(IR::Type type) { - switch (type) { - case IR::Type::U1: - return Type::U1; - case IR::Type::U32: - return Type::U32; - case IR::Type::F32: - return Type::F32; - case IR::Type::U64: - return Type::U64; - case IR::Type::F64: - return Type::F64; - default: - throw NotImplementedException("IR type {}", type); - } -} - -std::string RegAlloc::GetGlslType(Type type) { - switch (type) { - case Type::U1: - return "bool "; - case Type::F16x2: - return "f16vec2 "; - case Type::U32: - return "uint "; - case Type::S32: - return "int "; - case Type::F32: - return "float "; - case Type::S64: - return "int64_t "; - case Type::U64: - return "uint64_t "; - case Type::F64: - return "double "; - case Type::U32x2: - return "uvec2 "; - case Type::F32x2: - return "vec2 "; - case Type::U32x3: - return "uvec3 "; - case Type::F32x3: - return "vec3 "; - case Type::U32x4: - return "uvec4 "; - case Type::F32x4: - return "vec4 "; - case Type::Void: - return ""; - default: - throw NotImplementedException("Type {}", type); - } -} - -std::string RegAlloc::GetGlslType(IR::Type type) { - return GetGlslType(RegType(type)); -} - -Id RegAlloc::Alloc() { - if (num_used_registers < NUM_REGS) { - for (size_t reg = 0; reg < NUM_REGS; ++reg) { - if (register_use[reg]) { - continue; - } - register_use[reg] = true; - Id ret{}; - ret.is_valid.Assign(1); - ret.is_long.Assign(0); - ret.is_spill.Assign(0); - ret.is_condition_code.Assign(0); - ret.index.Assign(static_cast(reg)); - return ret; - } - } - throw NotImplementedException("Register spilling"); -} - -void RegAlloc::Free(Id id) { - if (id.is_spill != 0) { - throw NotImplementedException("Free spill"); - } - register_use[id.index] = false; -} - -} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h deleted file mode 100644 index 6c293f9d1..000000000 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Shader::IR { -class Inst; -class Value; -enum class Type; -} // namespace Shader::IR - -namespace Shader::Backend::GLSL { -enum class Type : u32 { - U1, - F16x2, - S32, - U32, - F32, - S64, - U64, - F64, - U32x2, - F32x2, - U32x3, - F32x3, - U32x4, - F32x4, - Void, -}; - -struct Id { - union { - u32 raw; - BitField<0, 1, u32> is_valid; - BitField<1, 1, u32> is_long; - BitField<2, 1, u32> is_spill; - BitField<3, 1, u32> is_condition_code; - BitField<4, 1, u32> is_null; - BitField<5, 27, u32> index; - }; - - bool operator==(Id rhs) const noexcept { - return raw == rhs.raw; - } - bool operator!=(Id rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(sizeof(Id) == sizeof(u32)); - -class RegAlloc { -public: - std::string Define(IR::Inst& inst); - std::string Define(IR::Inst& inst, Type type); - std::string Define(IR::Inst& inst, IR::Type type); - - std::string Consume(const IR::Value& value); - std::string Consume(IR::Inst& inst); - - std::string GetGlslType(Type type); - std::string GetGlslType(IR::Type type); - - size_t num_used_registers{}; - std::vector reg_types; - -private: - static constexpr size_t NUM_REGS = 4096; - - Type RegType(IR::Type type); - Id Alloc(); - void Free(Id id); - - std::bitset register_use{}; - std::bitset register_defined{}; -}; - -} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp new file mode 100644 index 000000000..8c6944f07 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -0,0 +1,290 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "shader_recompiler/backend/glsl/var_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +std::string TypePrefix(GlslVarType type) { + switch (type) { + case GlslVarType::U1: + return "b_"; + case GlslVarType::F16x2: + return "f16x2_"; + case GlslVarType::U32: + return "u_"; + case GlslVarType::S32: + return "s_"; + case GlslVarType::F32: + return "f_"; + case GlslVarType::S64: + return "s64_"; + case GlslVarType::U64: + return "u64_"; + case GlslVarType::F64: + return "d_"; + case GlslVarType::U32x2: + return "u2_"; + case GlslVarType::F32x2: + return "f2_"; + case GlslVarType::U32x3: + return "u3_"; + case GlslVarType::F32x3: + return "f3_"; + case GlslVarType::U32x4: + return "u4_"; + case GlslVarType::F32x4: + return "f4_"; + case GlslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +std::string FormatFloat(std::string_view value, IR::Type type) { + // TODO: Confirm FP64 nan/inf + if (type == IR::Type::F32) { + if (value == "nan") { + return "uintBitsToFloat(0x7fc00000)"; + } + if (value == "inf") { + return "uintBitsToFloat(0x7f800000)"; + } + if (value == "-inf") { + return "uintBitsToFloat(0xff800000)"; + } + } + if (value.find_first_of('e') != std::string_view::npos) { + // scientific notation + const auto cast{type == IR::Type::F32 ? "float" : "double"}; + return fmt::format("{}({})", cast, value); + } + const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; + const bool needs_suffix{!value.ends_with('f')}; + const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; + return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}u", value.U32()); + case IR::Type::F32: + return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); + case IR::Type::U64: + return fmt::format("{}ul", value.U64()); + case IR::Type::F64: + return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string VarAlloc::Representation(u32 index, GlslVarType type) const { + const auto prefix{TypePrefix(type)}; + return fmt::format("{}{}", prefix, index); +} + +std::string VarAlloc::Representation(Id id) const { + return Representation(id.index, id.type); +} + +std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition(Alloc(type)); + return Representation(inst.Definition()); + } else { + Id id{}; + id.type.Assign(type); + // id.is_null.Assign(1); + GetUseTracker(type).uses_temp = true; + inst.SetDefinition(id); + } + return Representation(inst.Definition()); +} + +std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { + return Define(inst, RegType(type)); +} + +std::string VarAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); +} + +std::string VarAlloc::ConsumeInst(IR::Inst& inst) { + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(inst.Definition()); + } + return Representation(inst.Definition()); +} + +std::string VarAlloc::GetGlslType(IR::Type type) const { + return GetGlslType(RegType(type)); +} + +Id VarAlloc::Alloc(GlslVarType type) { + auto& use_tracker{GetUseTracker(type)}; + if (use_tracker.num_used < NUM_VARS) { + for (size_t var = 1; var < NUM_VARS; ++var) { + if (use_tracker.var_use[var]) { + continue; + } + use_tracker.num_used = std::max(use_tracker.num_used, var + 1); + use_tracker.var_use[var] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast(var)); + return ret; + } + } + throw NotImplementedException("Variable spilling"); +} + +void VarAlloc::Free(Id id) { + if (id.is_valid == 0) { + // throw LogicError("Freeing invalid variable"); + return; + } + auto& use_tracker{GetUseTracker(id.type)}; + use_tracker.var_use[id.index] = false; +} + +GlslVarType VarAlloc::RegType(IR::Type type) const { + switch (type) { + case IR::Type::U1: + return GlslVarType::U1; + case IR::Type::U32: + return GlslVarType::U32; + case IR::Type::F32: + return GlslVarType::F32; + case IR::Type::U64: + return GlslVarType::U64; + case IR::Type::F64: + return GlslVarType::F64; + default: + throw NotImplementedException("IR type {}", type); + } +} + +std::string VarAlloc::GetGlslType(GlslVarType type) const { + switch (type) { + case GlslVarType::U1: + return "bool "; + case GlslVarType::F16x2: + return "f16vec2 "; + case GlslVarType::U32: + return "uint "; + case GlslVarType::S32: + return "int "; + case GlslVarType::F32: + return "float "; + case GlslVarType::S64: + return "int64_t "; + case GlslVarType::U64: + return "uint64_t "; + case GlslVarType::F64: + return "double "; + case GlslVarType::U32x2: + return "uvec2 "; + case GlslVarType::F32x2: + return "vec2 "; + case GlslVarType::U32x3: + return "uvec3 "; + case GlslVarType::F32x3: + return "vec3 "; + case GlslVarType::U32x4: + return "uvec4 "; + case GlslVarType::F32x4: + return "vec4 "; + case GlslVarType::Void: + return ""; + default: + throw NotImplementedException("Type {}", type); + } +} + +VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { + switch (type) { + case GlslVarType::U1: + return var_bool; + case GlslVarType::U32: + return var_u32; + case GlslVarType::S32: + return var_s32; + case GlslVarType::F32: + return var_f32; + case GlslVarType::S64: + return var_s64; + case GlslVarType::U64: + return var_u64; + case GlslVarType::F64: + return var_f64; + case GlslVarType::U32x2: + return var_u32x2; + case GlslVarType::F32x2: + return var_f32x2; + case GlslVarType::U32x3: + return var_u32x3; + case GlslVarType::F32x3: + return var_f32x3; + case GlslVarType::U32x4: + return var_u32x4; + case GlslVarType::F32x4: + return var_f32x4; + default: + throw NotImplementedException("Type {}", type); + } +} + +const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { + switch (type) { + case GlslVarType::U1: + return var_bool; + case GlslVarType::F16x2: + return var_f16x2; + case GlslVarType::U32: + return var_u32; + case GlslVarType::S32: + return var_s32; + case GlslVarType::F32: + return var_f32; + case GlslVarType::S64: + return var_s64; + case GlslVarType::U64: + return var_u64; + case GlslVarType::F64: + return var_f64; + case GlslVarType::U32x2: + return var_u32x2; + case GlslVarType::F32x2: + return var_f32x2; + case GlslVarType::U32x3: + return var_u32x3; + case GlslVarType::F32x3: + return var_f32x3; + case GlslVarType::U32x4: + return var_u32x4; + case GlslVarType::F32x4: + return var_f32x4; + default: + throw NotImplementedException("Type {}", type); + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h new file mode 100644 index 000000000..29d78a571 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +enum class Type; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { +enum class GlslVarType : u32 { + U1, + F16x2, + S32, + U32, + F32, + S64, + U64, + F64, + U32x2, + F32x2, + U32x3, + F32x3, + U32x4, + F32x4, + Void, +}; + +struct Id { + union { + u32 raw; + BitField<0, 1, u32> is_valid; + BitField<1, 4, GlslVarType> type; + BitField<5, 27, u32> index; + }; + + bool operator==(Id rhs) const noexcept { + return raw == rhs.raw; + } + bool operator!=(Id rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(sizeof(Id) == sizeof(u32)); + +class VarAlloc { +public: + static constexpr size_t NUM_VARS = 511; + struct UseTracker { + size_t num_used{}; + std::bitset var_use{}; + bool uses_temp{}; + }; + + std::string Define(IR::Inst& inst, GlslVarType type); + std::string Define(IR::Inst& inst, IR::Type type); + + std::string Consume(const IR::Value& value); + std::string ConsumeInst(IR::Inst& inst); + + std::string GetGlslType(GlslVarType type) const; + std::string GetGlslType(IR::Type type) const; + + const UseTracker& GetUseTracker(GlslVarType type) const; + std::string Representation(u32 index, GlslVarType type) const; + +private: + GlslVarType RegType(IR::Type type) const; + Id Alloc(GlslVarType type); + void Free(Id id); + UseTracker& GetUseTracker(GlslVarType type); + std::string Representation(Id id) const; + + UseTracker var_bool{}; + UseTracker var_f16x2{}; + UseTracker var_s32{}; + UseTracker var_u32{}; + UseTracker var_u32x2{}; + UseTracker var_u32x3{}; + UseTracker var_u32x4{}; + UseTracker var_f32{}; + UseTracker var_f32x2{}; + UseTracker var_f32x3{}; + UseTracker var_f32x4{}; + UseTracker var_u64{}; + UseTracker var_s64{}; + UseTracker var_f64{}; +}; + +} // namespace Shader::Backend::GLSL From 9f3ffb996b0d02ca64b492d22ff158e8f3659257 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 19:13:22 -0400 Subject: [PATCH 617/770] glsl: Rework var alloc to not assign unused results --- .../backend/glsl/emit_context.cpp | 8 ++- .../backend/glsl/emit_context.h | 8 ++- .../backend/glsl/emit_glsl_atomic.cpp | 9 ++-- .../glsl/emit_glsl_bitwise_conversion.cpp | 8 ++- .../backend/glsl/emit_glsl_composite.cpp | 50 ++++++++++++------- .../backend/glsl/emit_glsl_instructions.h | 26 +++++----- .../glsl/emit_glsl_not_implemented.cpp | 8 +-- .../backend/glsl/var_alloc.cpp | 17 ++++++- .../backend/glsl/var_alloc.h | 6 +++ 9 files changed, 91 insertions(+), 49 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index b9594de40..da29290a2 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,11 +122,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; - if (stage != Stage::Compute) { - // TODO: track this usage - header += "#extension GL_ARB_sparse_texture2 : enable\n"; - header += "#extension GL_EXT_texture_shadow_lod : enable\n"; - } + // TODO: track this usage + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 2f1062954..423fc6104 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -37,7 +37,13 @@ public: template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { - code += fmt::format(format_str, var_alloc.Define(inst, type), std::forward(args)...); + const auto var_def{var_alloc.AddDefine(inst, type)}; + if (var_def.empty()) { + // skip assigment. + code += fmt::format(&format_str[3], std::forward(args)...); + } else { + code += fmt::format(format_str, var_def, std::forward(args)...); + } // TODO: Remove this code += '\n'; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 918f90058..db4c60002 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -11,8 +11,7 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view cas_loop{R"({}; -for (;;){{ +static constexpr std::string_view cas_loop{R"(for (;;){{ uint old_value={}; {}=atomicCompSwap({},old_value,{}({},{})); if ({}==old_value){{break;}} @@ -22,14 +21,14 @@ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset std::string_view value, std::string_view function) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const std::string smem{fmt::format("smem[{}/4]", offset)}; - ctx.Add(cas_loop.data(), ret, smem, ret, smem, function, smem, value, ret); + ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); } void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value, std::string_view function) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; - ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); } void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -37,7 +36,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi std::string_view function) { const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - ctx.Add(cas_loop.data(), ret, ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); ctx.AddF32("{}=uintBitsToFloat({});", inst, ret); } } // namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 2b08aa593..9d844b831 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -26,7 +26,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { } void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { - ctx.AddU1("{}={};", inst, ctx.var_alloc.Consume(value)); + // Fake one usage to get a real variable out of the condition + inst.DestructiveAddUsage(1); + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; + const auto input{ctx.var_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("{}={};", ret, input); + } } void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 0fd667c8f..44a719fc3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -9,8 +9,14 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { static constexpr std::string_view SWIZZLE{"xyzw"}; - +void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, + std::string_view object, u32 index) { + ctx.Add("{}={};", result, composite); + ctx.Add("{}.{}={};", result, SWIZZLE[index], object); +} +} // namespace void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, std::string_view e2) { ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); @@ -41,19 +47,22 @@ void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; + CompositeInsert(ctx, ret, composite, object, index); } -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)}; + CompositeInsert(ctx, ret, composite, object, index); } -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)}; + CompositeInsert(ctx, ret, composite, object, index); } void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, @@ -146,19 +155,22 @@ void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)}; + CompositeInsert(ctx, ret, composite, object, index); } -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)}; + CompositeInsert(ctx, ret, composite, object, index); } -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index) { - ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + CompositeInsert(ctx, ret, composite, object, index); } void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 703db80ee..c2e5aff16 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -26,7 +26,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitReference(EmitContext&); +void EmitReference(EmitContext& ctx, const IR::Value& value); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); void EmitBranch(EmitContext& ctx, std::string_view label); void EmitBranchConditional(EmitContext& ctx, std::string_view condition, @@ -165,12 +165,12 @@ void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_vie u32 index); void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); -void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, std::string_view e3); @@ -197,12 +197,12 @@ void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_vie u32 index); void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); -void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); -void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, - u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + std::string_view object, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 806c4777b..599ff90e0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -25,7 +25,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& phi) { } if (!phi.Definition().is_valid) { // The phi node wasn't forward defined - ctx.Add("{};", ctx.var_alloc.Define(phi, phi.Arg(0).Type())); + ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); } } @@ -33,8 +33,8 @@ void EmitVoid(EmitContext& ctx) { // NotImplemented(); } -void EmitReference(EmitContext&) { - // NotImplemented(); +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.var_alloc.Consume(value); } void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { @@ -42,7 +42,7 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& const auto phi_type{phi.Arg(0).Type()}; if (!phi.Definition().is_valid) { // The phi node wasn't forward defined - ctx.Add("{};", ctx.var_alloc.Define(phi, phi_type)); + ctx.var_alloc.PhiDefine(phi, phi_type); } const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; const auto val_reg{ctx.var_alloc.Consume(value)}; diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 8c6944f07..896457248 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -110,7 +110,6 @@ std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { } else { Id id{}; id.type.Assign(type); - // id.is_null.Assign(1); GetUseTracker(type).uses_temp = true; inst.SetDefinition(id); } @@ -121,6 +120,20 @@ std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { return Define(inst, RegType(type)); } +std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) { + return AddDefine(inst, RegType(type)); +} + +std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) { + if (inst.HasUses()) { + inst.SetDefinition(Alloc(type)); + return Representation(inst.Definition()); + } else { + return ""; + } + return Representation(inst.Definition()); +} + std::string VarAlloc::Consume(const IR::Value& value) { return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); } @@ -223,6 +236,8 @@ VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { switch (type) { case GlslVarType::U1: return var_bool; + case GlslVarType::F16x2: + return var_f16x2; case GlslVarType::U32: return var_u32; case GlslVarType::S32: diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h index 29d78a571..574960b1a 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.h +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -62,9 +62,15 @@ public: bool uses_temp{}; }; + /// Used for explicit usages of variables, may revert to temporaries std::string Define(IR::Inst& inst, GlslVarType type); std::string Define(IR::Inst& inst, IR::Type type); + /// Used to assign variables used by the IR. May return a blank string if + /// the instruction's result is unused in the IR. + std::string AddDefine(IR::Inst& inst, GlslVarType type); + std::string PhiDefine(IR::Inst& inst, IR::Type type); + std::string Consume(const IR::Value& value); std::string ConsumeInst(IR::Inst& inst); From 8ec0028e687f0ace26e0a4abf8ffbe1abb19653d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 19:21:30 -0400 Subject: [PATCH 618/770] glsl: implement set clip distance and missed a diff in emit_glsl relating to var alloc ref counting --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 3 +++ .../backend/glsl/emit_glsl_context_get_set.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 4304ee4d5..f0257db7f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -112,6 +112,9 @@ void PrecolorInst(IR::Inst& phi) { ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); } } + for (size_t i = 0; i < num_args; ++i) { + IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); + } } void Precolor(const IR::Program& program) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 8688686e8..8f5f94752 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -186,6 +186,18 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::PositionW: ctx.Add("gl_Position.{}={};", swizzle, value); break; + case IR::Attribute::ClipDistance0: + case IR::Attribute::ClipDistance1: + case IR::Attribute::ClipDistance2: + case IR::Attribute::ClipDistance3: + case IR::Attribute::ClipDistance4: + case IR::Attribute::ClipDistance5: + case IR::Attribute::ClipDistance6: + case IR::Attribute::ClipDistance7: { + const u32 index{static_cast(attr) - static_cast(IR::Attribute::ClipDistance0)}; + ctx.Add("gl_ClipDistance[{}]={};", index, value); + break; + } default: fmt::print("Set attribute {}", attr); throw NotImplementedException("Set attribute {}", attr); From 4b5a4ea72e471eaf7d3fae34746e49aa9a6aaf08 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 20:02:44 -0400 Subject: [PATCH 619/770] glsl: Fix ssbo indexing and name shadowing between shader stages --- .../backend/glsl/emit_context.cpp | 9 +- .../backend/glsl/emit_glsl_atomic.cpp | 106 ++++++++++-------- .../backend/glsl/emit_glsl_memory.cpp | 63 ++++++----- 3 files changed, 101 insertions(+), 77 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index da29290a2..788679f40 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -121,7 +121,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } void EmitContext::SetupExtensions(std::string&) { - header += "#extension GL_ARB_separate_shader_objects : enable\n"; // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n"; header += "#extension GL_EXT_texture_shadow_lod : enable\n"; @@ -171,11 +170,13 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { if (info.storage_buffers_descriptors.empty()) { return; } + u32 index{}; for (const auto& desc : info.storage_buffers_descriptors) { - header += - fmt::format("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", - bindings.storage_buffer, bindings.storage_buffer, bindings.storage_buffer); + header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", + bindings.storage_buffer, stage_name, bindings.storage_buffer, + stage_name, index); bindings.storage_buffer += desc.count; + index += desc.count; } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index db4c60002..d3301054c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -27,14 +27,16 @@ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value, std::string_view function) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + const std::string ssbo{ + fmt::format("{}_ssbo{}[{}]", ctx.stage_name, binding.U32(), offset.U32())}; ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); } void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value, std::string_view function) { - const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + const std::string ssbo{ + fmt::format("{}_ssbo{}[{}]", ctx.stage_name, binding.U32(), offset.U32())}; const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); ctx.AddF32("{}=uintBitsToFloat({});", inst, ret); @@ -109,7 +111,8 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicAdd(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), + value); } void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -120,7 +123,8 @@ void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicMin(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicMin({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), + value); } void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -131,7 +135,8 @@ void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicMax(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicMax({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), + value); } void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -146,103 +151,116 @@ void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& b void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicAnd(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), + value); } void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicOr(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicOr({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), + value); } void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicXor(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicXor({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), + value); } void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicExchange(ssbo{}[{}],{});", inst, binding.U32(), offset.U32(), value); + ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), + offset.U32(), value); } void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), - binding.U32(), offset.U32() + 1); - ctx.Add("ssbo{}[{}]+=unpackUint2x32({}).x;ssbo{}[{}]+=unpackUint2x32({}).y;", binding.U32(), - offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, + binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + ctx.Add("{}_ssbo{}[{}]+=unpackUint2x32({}).x;{}_ssbo{}[{}]+=unpackUint2x32({}).y;", + ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(), + offset.U32() + 1, value); } void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=packInt2x32(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), - binding.U32(), offset.U32() + 1); + ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, + binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); ctx.Add("for(int i=0;i<2;++i){{ " - "ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + "{}_ssbo{}[{}+i]=uint(min(int({}_ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", + ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), + offset.U32(), value); } void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), - binding.U32(), offset.U32() + 1); - ctx.Add( - "for(int i=0;i<2;++i){{ ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, + binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[{}+i]=min({}_ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", + ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), + offset.U32(), value); } void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=packInt2x32(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), - binding.U32(), offset.U32() + 1); + ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, + binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); ctx.Add("for(int i=0;i<2;++i){{ " - "ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + "{}_ssbo{}[{}+i]=uint(max(int({}_ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", + ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), + offset.U32(), value); } void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), - binding.U32(), offset.U32() + 1); - ctx.Add( - "for(int i=0;i<2;++i){{ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, + binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[{}+i]=max({}_ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", + ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), + offset.U32(), value); } void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64( - "{}=packUint2x32(uvec2(atomicAnd(ssbo{}[{}],unpackUint2x32({}).x),atomicAnd(ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64("{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}],unpackUint2x32({}).x),atomicAnd({}_" + "ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, + binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { ctx.AddU64( - "{}=packUint2x32(uvec2(atomicOr(ssbo{}[{}],unpackUint2x32({}).x),atomicOr(ssbo{}[{}]," + "{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}],unpackUint2x32({}).x),atomicOr({}_ssbo{}[{}]," "unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(), + offset.U32() + 1, value); } void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64( - "{}=packUint2x32(uvec2(atomicXor(ssbo{}[{}],unpackUint2x32({}).x),atomicXor(ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + ctx.AddU64("{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}],unpackUint2x32({}).x),atomicXor({}_" + "ssbo{}[{}]," + "unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, + binding.U32(), offset.U32() + 1, value); } void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { ctx.AddU64( - "{}=packUint2x32(uvec2(atomicExchange(ssbo{}[{}],unpackUint2x32({}).x),atomicExchange(" - "ssbo{}[{}],unpackUint2x32({}).y)));", - inst, binding.U32(), offset.U32(), value, binding.U32(), offset.U32() + 1, value); + "{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}],unpackUint2x32({}).x),atomicExchange(" + "{}_ssbo{}[{}],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(), + offset.U32() + 1, value); } void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 8ce186733..78fbb9d6e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -13,53 +13,54 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int({}%4)*8,8);", inst, binding.U32(), offset_var, - offset_var); + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); } void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int({}%4)*8,8);", inst, binding.U32(), - offset_var, offset_var); + ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int({}%4)*8,8);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); } void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, binding.U32(), - offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, ctx.stage_name, + binding.U32(), offset_var, offset_var); } void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, binding.U32(), - offset_var, offset_var); + ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var); } void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=ssbo{}[{}/4];", inst, binding.U32(), offset_var); + ctx.AddU32("{}={}_ssbo{}[{}/4];", inst, ctx.stage_name, binding.U32(), offset_var); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(ssbo{}[{}/4],ssbo{}[({}+4)/4]);", inst, binding.U32(), offset_var, - binding.U32(), offset_var); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4]);", inst, ctx.stage_name, + binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32x4("{}=uvec4(ssbo{}[{}/4],ssbo{}[({}+4)/4],ssbo{}[({}+8)/4],ssbo{}[({}+12)/4]);", - inst, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), - offset_var, binding.U32(), offset_var); + ctx.AddU32x4( + "{}=uvec4({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4],{}_ssbo{}[({}+8)/4],{}_ssbo{}[({}+12)/4]);", + inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, + ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); } void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, @@ -67,8 +68,9 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), - offset_var, binding.U32(), offset_var, value, offset_var); + ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, + binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, + offset_var); } void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, @@ -76,8 +78,9 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), - offset_var, binding.U32(), offset_var, value, offset_var); + ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, + binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, + offset_var); } void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, @@ -85,8 +88,9 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), - offset_var, binding.U32(), offset_var, value, offset_var); + ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", + ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, + value, offset_var); } void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, @@ -94,21 +98,22 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), - offset_var, binding.U32(), offset_var, value, offset_var); + ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", + ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, + value, offset_var); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]={};", binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[{}/4]={};", ctx.stage_name, binding.U32(), offset_var, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}+4)/4]={}.y;", binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, @@ -116,9 +121,9 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}+4)/4]={}.y;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}+8)/4]={}.z;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[({}+12)/4]={}.w;", binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+8)/4]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+12)/4]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); } } // namespace Shader::Backend::GLSL From 14bfb4719ad366745b5d16452914c4c78e43b8ae Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 22:42:52 -0400 Subject: [PATCH 620/770] HACK glsl: Write defaults to unused generic attributes --- src/shader_recompiler/backend/glsl/emit_context.cpp | 7 +++++-- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 788679f40..8de33b582 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -110,8 +110,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { - header += fmt::format("layout(location={}) out vec4 out_attr{};", index, index); + // TODO: Properly resolve attribute issues + const auto declaration{ + fmt::format("layout(location={}) out vec4 out_attr{};", index, index)}; + if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { + header += declaration; } } DefineConstantBuffers(bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index f0257db7f..19cf4e46b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -209,6 +209,12 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header += "void main(){\n"; if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; + // TODO: Properly resolve attribute issues + for (size_t index = 0; index < program.info.stores_generics.size() / 2; ++index) { + if (!program.info.stores_generics[index]) { + ctx.header += fmt::format("out_attr{}=vec4(0,0,0,1);", index); + } + } } DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { From e10366974edd7c75111d0bef16daf941db9e9a30 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 22:44:28 -0400 Subject: [PATCH 621/770] glsl: Implement precise fp variable allocation --- .../backend/glsl/emit_context.h | 10 +++++ .../backend/glsl/emit_glsl_floating_point.cpp | 41 ++++++++++++++++--- .../backend/glsl/var_alloc.cpp | 16 ++++++++ .../backend/glsl/var_alloc.h | 8 +++- 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 423fc6104..48786a2c7 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -118,6 +118,16 @@ public: Add(format_str, inst, args...); } + template + void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + + template + void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void Add(const char* format_str, Args&&... args) { code += fmt::format(format_str, std::forward(args)...); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 5f9603602..342d4efb2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { @@ -20,6 +21,10 @@ void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string } ctx.code += ";"; } + +bool Precise(IR::Inst& inst) { + return {inst.Flags().no_contraction}; +} } // namespace void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -41,11 +46,19 @@ void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddF32("{}=float({})+float({});", inst, a, b); + if (Precise(inst)) { + ctx.AddPrecF32("{}=float({})+float({});", inst, a, b); + } else { + ctx.AddF32("{}=float({})+float({});", inst, a, b); + } } void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddF64("{}=double({})+double({});", inst, a, b); + if (Precise(inst)) { + ctx.AddPrecF64("{}=double({})+double({});", inst, a, b); + } else { + ctx.AddF64("{}=double({})+double({});", inst, a, b); + } } void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -56,12 +69,20 @@ void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c) { - ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); + if (Precise(inst)) { + ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); + } } void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c) { - ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); + if (Precise(inst)) { + ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); + } else { + ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); + } } void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { @@ -86,11 +107,19 @@ void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddF32("{}={}*{};", inst, a, b); + if (Precise(inst)) { + ctx.AddPrecF32("{}={}*{};", inst, a, b); + } else { + ctx.AddF32("{}={}*{};", inst, a, b); + } } void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddF64("{}={}*{};", inst, a, b); + if (Precise(inst)) { + ctx.AddPrecF64("{}={}*{};", inst, a, b); + } else { + ctx.AddF64("{}={}*{};", inst, a, b); + } } void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 896457248..2ecdec7f2 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -43,6 +43,10 @@ std::string TypePrefix(GlslVarType type) { return "u4_"; case GlslVarType::F32x4: return "f4_"; + case GlslVarType::PrecF32: + return "pf_"; + case GlslVarType::PrecF64: + return "pd_"; case GlslVarType::Void: return ""; default: @@ -225,6 +229,10 @@ std::string VarAlloc::GetGlslType(GlslVarType type) const { return "uvec4 "; case GlslVarType::F32x4: return "vec4 "; + case GlslVarType::PrecF32: + return "precise float "; + case GlslVarType::PrecF64: + return "precise double "; case GlslVarType::Void: return ""; default: @@ -262,6 +270,10 @@ VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { return var_u32x4; case GlslVarType::F32x4: return var_f32x4; + case GlslVarType::PrecF32: + return var_precf32; + case GlslVarType::PrecF64: + return var_precf64; default: throw NotImplementedException("Type {}", type); } @@ -297,6 +309,10 @@ const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { return var_u32x4; case GlslVarType::F32x4: return var_f32x4; + case GlslVarType::PrecF32: + return var_precf32; + case GlslVarType::PrecF64: + return var_precf64; default: throw NotImplementedException("Type {}", type); } diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h index 574960b1a..be21a87ea 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.h +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -33,6 +33,8 @@ enum class GlslVarType : u32 { F32x3, U32x4, F32x4, + PrecF32, + PrecF64, Void, }; @@ -40,8 +42,8 @@ struct Id { union { u32 raw; BitField<0, 1, u32> is_valid; - BitField<1, 4, GlslVarType> type; - BitField<5, 27, u32> index; + BitField<1, 5, GlslVarType> type; + BitField<6, 26, u32> index; }; bool operator==(Id rhs) const noexcept { @@ -101,6 +103,8 @@ private: UseTracker var_u64{}; UseTracker var_s64{}; UseTracker var_f64{}; + UseTracker var_precf32{}; + UseTracker var_precf64{}; }; } // namespace Shader::Backend::GLSL From b7561226edaefc79eadcfbd3df1b0344b7c4b673 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 00:25:54 -0400 Subject: [PATCH 622/770] glsl: SHFL fix and prefer shift operations over divide in glsl shader --- .../backend/glsl/emit_glsl_atomic.cpp | 22 +++++----- .../glsl/emit_glsl_context_get_set.cpp | 20 ++++----- .../backend/glsl/emit_glsl_memory.cpp | 43 ++++++++++--------- .../backend/glsl/emit_glsl_shared_memory.cpp | 34 +++++++-------- .../backend/glsl/emit_glsl_warp.cpp | 8 ++-- 5 files changed, 64 insertions(+), 63 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index d3301054c..9714ffe33 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -20,7 +20,7 @@ static constexpr std::string_view cas_loop{R"(for (;;){{ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, std::string_view value, std::string_view function) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const std::string smem{fmt::format("smem[{}/4]", offset)}; + const std::string smem{fmt::format("smem[{}>>2]", offset)}; ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); } @@ -45,7 +45,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicAdd(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, @@ -56,7 +56,7 @@ void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view p void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicMin(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, @@ -67,7 +67,7 @@ void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view p void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicMax(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, @@ -82,31 +82,31 @@ void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view po void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicAnd(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicOr(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicXor(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - ctx.AddU32("{}=atomicExchange(smem[{}/4],{});", inst, pointer_offset, value); + ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); } void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2(smem[{}/4],smem[({}+4)/4]));", inst, pointer_offset, + ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, pointer_offset); - ctx.Add("smem[{}/4]=unpackUint2x32({}).x;smem[({}+4)/4]=unpackUint2x32({}).y;", pointer_offset, - value, pointer_offset, value); + ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", + pointer_offset, value, pointer_offset, value); } void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 8f5f94752..8d2abdd94 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -31,7 +31,7 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32( - "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", + "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } } @@ -46,8 +46,8 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32( - "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int(({}%4)*8),8);", inst, - ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", + inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } } @@ -60,7 +60,7 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst ((offset.U32() / 2) % 2) * 16); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]),int((({}/" + ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" "2)%2)*16),16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } @@ -75,9 +75,9 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst ((offset.U32() / 2) % 2) * 16); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32( - "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}/4)%4]),int((({}/2)%2)*16),16);", - inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" + "2)%2)*16),16);", + inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } } @@ -88,7 +88,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, offset.U32() / 16, OffsetSwizzle(offset.U32())); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}/4)%4]);", inst, ctx.stage_name, + ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } } @@ -100,7 +100,7 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, OffsetSwizzle(offset.U32())); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddF32("{}={}_cbuf{}[{}/16][({}/4)%4];", inst, ctx.stage_name, binding.U32(), + ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } } @@ -116,7 +116,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" - "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)/4)%4]));", + "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, binding.U32(), offset_var, offset_var); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 78fbb9d6e..a4411b68b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -13,7 +13,7 @@ void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int({}%4)*8,8);", inst, ctx.stage_name, + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -21,7 +21,7 @@ void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int({}%4)*8,8);", inst, ctx.stage_name, + ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -29,7 +29,7 @@ void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, ctx.stage_name, + ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -37,30 +37,31 @@ void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, + ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}={}_ssbo{}[{}/4];", inst, ctx.stage_name, binding.U32(), offset_var); + ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4]);", inst, ctx.stage_name, + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32x4( - "{}=uvec4({}_ssbo{}[{}/4],{}_ssbo{}[({}+4)/4],{}_ssbo{}[({}+8)/4],{}_ssbo{}[({}+12)/4]);", - inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, - ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); + ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" + "+12)>>2]);", + inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), + offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, + binding.U32(), offset_var); } void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, @@ -68,7 +69,7 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, + ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, offset_var); } @@ -78,7 +79,7 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int({}%4)*8,8);", ctx.stage_name, + ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, offset_var); } @@ -88,7 +89,7 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", + ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, offset_var); } @@ -98,7 +99,7 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]=bitfieldInsert({}_ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", + ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, offset_var); } @@ -106,14 +107,14 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]={};", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); - ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, @@ -121,9 +122,9 @@ void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}/4]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); - ctx.Add("{}_ssbo{}[({}+4)/4]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); - ctx.Add("{}_ssbo{}[({}+8)/4]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); - ctx.Add("{}_ssbo{}[({}+12)/4]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); + ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 8a4c69547..578bc349f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -11,70 +11,70 @@ namespace Shader::Backend::GLSL { void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int({}%4)*8,8);", inst, offset, offset); + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int({}%4)*8,8);", inst, offset, offset); + ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); } void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddU32("{}=bitfieldExtract(smem[{}/4],int(({}/2)%2)*16,16);", inst, offset, offset); + ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); } void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddS32("{}=bitfieldExtract(int(smem[{}/4]),int(({}/2)%2)*16,16);", inst, offset, offset); + ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); } void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddU32("{}=smem[{}/4];", inst, offset); + ctx.AddU32("{}=smem[{}>>2];", inst, offset); } void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddU32x2("{}=uvec2(smem[{}/4],smem[({}+4)/4]);", inst, offset, offset); + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); } void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view offset) { - ctx.AddU32x4("{}=uvec4(smem[{}/4],smem[({}+4)/4],smem[({}+8)/4],smem[({}+12)/4]);", inst, + ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, offset, offset, offset, offset); } void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view value) { - ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int({}%4)*8,8);", offset, offset, value, + ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, offset); } void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view value) { - ctx.Add("smem[{}/4]=bitfieldInsert(smem[{}/4],{},int(({}/2)%2)*16,16);", offset, offset, value, - offset); + ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, + value, offset); } void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view value) { - ctx.Add("smem[{}/4]={};", offset, value); + ctx.Add("smem[{}>>2]={};", offset, value); } void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view value) { - ctx.Add("smem[{}/4]={}.x;", offset, value); - ctx.Add("smem[({}+4)/4]={}.y;", offset, value); + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); } void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view value) { - ctx.Add("smem[{}/4]={}.x;", offset, value); - ctx.Add("smem[({}+4)/4]={}.y;", offset, value); - ctx.Add("smem[({}+8)/4]={}.z;", offset, value); - ctx.Add("smem[({}+12)/4]={}.w;", offset, value); + ctx.Add("smem[{}>>2]={}.x;", offset, value); + ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); + ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); + ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 4286f29c7..0b6c5ad82 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -112,7 +112,7 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, @@ -122,7 +122,7 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, @@ -133,7 +133,7 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, @@ -144,7 +144,7 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); SetInBoundsFlag(ctx, inst); - ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, From 3a024b302622068f4842715a7f0b31652898a606 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 01:12:52 -0400 Subject: [PATCH 623/770] glsl: Implement gl_ViewportIndex SSBU now working --- src/shader_recompiler/backend/glsl/emit_context.cpp | 10 ++++++++-- .../backend/glsl/emit_glsl_context_get_set.cpp | 3 +++ src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | 6 +++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 8de33b582..2a5ec7414 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -126,6 +126,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n"; + header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; + header += "#extension GL_NV_viewport_array2 : enable\n"; header += "#extension GL_EXT_texture_shadow_lod : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; @@ -243,9 +245,13 @@ void EmitContext::SetupImages(Bindings& bindings) { } texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { - throw NotImplementedException("TextureType::Buffer"); - texture_buffer_bindings.push_back(bindings.texture); + const auto sampler_type{SamplerType(TextureType::Buffer, false)}; + const auto indices{bindings.texture + desc.count}; + for (u32 index = bindings.texture; index < indices; ++index) { + header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, + sampler_type, index); + } bindings.texture += desc.count; } texture_bindings.reserve(info.texture_descriptors.size()); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 8d2abdd94..5f58e781e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -186,6 +186,9 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::PositionW: ctx.Add("gl_Position.{}={};", swizzle, value); break; + case IR::Attribute::ViewportIndex: + ctx.Add("gl_ViewportIndex=floatBitsToInt({});", value); + break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index eb427d8b5..f339f4ade 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -71,7 +71,7 @@ std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); - return ""; + return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; } const IR::Opcode opcode{values[0]->GetOpcode()}; if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { @@ -340,8 +340,8 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst *sparse_inst, texture, CastToIntVec(coords, info), lod, CastToIntVec(offset, info), texel); } else { - ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},{},{}));", *sparse_inst, - texture, CastToIntVec(coords, info), lod, texel); + ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", + *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); } } From df53046d68b26b23ced683396ebc204d96176c8e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 12:53:40 -0400 Subject: [PATCH 624/770] glsl: Use NotImplemented macro with function name output --- .../glsl/emit_glsl_bitwise_conversion.cpp | 4 +- .../backend/glsl/emit_glsl_composite.cpp | 30 +++++------ .../glsl/emit_glsl_context_get_set.cpp | 1 + .../backend/glsl/emit_glsl_convert.cpp | 52 +++++++++--------- .../backend/glsl/emit_glsl_floating_point.cpp | 50 ++++++++--------- .../backend/glsl/emit_glsl_image.cpp | 54 +++++++++---------- .../backend/glsl/emit_glsl_instructions.h | 2 + .../glsl/emit_glsl_not_implemented.cpp | 4 -- .../backend/glsl/emit_glsl_select.cpp | 6 +-- .../backend/glsl/emit_glsl_warp.cpp | 4 +- 10 files changed, 103 insertions(+), 104 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 9d844b831..1e860f11a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -36,7 +36,7 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) } void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -48,7 +48,7 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) } void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 44a719fc3..3697e1a34 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -68,14 +68,14 @@ void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view e1, [[maybe_unused]] std::string_view e2) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view e1, [[maybe_unused]] std::string_view e2, [[maybe_unused]] std::string_view e3) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, @@ -83,46 +83,46 @@ void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view e2, [[maybe_unused]] std::string_view e3, [[maybe_unused]] std::string_view e4) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view composite, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view composite, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view composite, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view composite, [[maybe_unused]] std::string_view object, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view composite, [[maybe_unused]] std::string_view object, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view composite, [[maybe_unused]] std::string_view object, [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, @@ -174,27 +174,27 @@ void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view } void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 5f58e781e..c66b4b282 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -139,6 +139,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, switch (ctx.stage) { case Stage::VertexA: case Stage::VertexB: + case Stage::Geometry: ctx.AddF32("{}=gl_Position.{};", inst, swizzle); break; case Stage::Fragment: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 85d07b4de..9ed5bb319 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -11,7 +11,7 @@ namespace Shader::Backend::GLSL { void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertS16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -21,12 +21,12 @@ void EmitConvertS16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertS32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -41,7 +41,7 @@ void EmitConvertS32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertS64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -56,22 +56,22 @@ void EmitConvertS64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertU32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -86,7 +86,7 @@ void EmitConvertU32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertU64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -111,12 +111,12 @@ void EmitConvertU32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -131,52 +131,52 @@ void EmitConvertF64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF32S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -191,7 +191,7 @@ void EmitConvertF32S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF32U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -211,12 +211,12 @@ void EmitConvertF32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF64S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -231,12 +231,12 @@ void EmitConvertF64S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitConvertF64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 342d4efb2..49ab182ea 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -29,7 +29,7 @@ bool Precise(IR::Inst& inst) { void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -42,7 +42,7 @@ void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { @@ -64,7 +64,7 @@ void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, [[maybe_unused]] std::string_view c) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, @@ -103,7 +103,7 @@ void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { @@ -124,7 +124,7 @@ void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -166,7 +166,7 @@ void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -175,7 +175,7 @@ void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -190,7 +190,7 @@ void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& [[maybe_unused]] std::string_view value, [[maybe_unused]] std::string_view min_value, [[maybe_unused]] std::string_view max_value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, @@ -207,7 +207,7 @@ void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -220,7 +220,7 @@ void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -233,7 +233,7 @@ void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -246,7 +246,7 @@ void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -259,7 +259,7 @@ void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -274,7 +274,7 @@ void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -289,7 +289,7 @@ void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -304,7 +304,7 @@ void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -319,7 +319,7 @@ void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lh void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -334,7 +334,7 @@ void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -350,7 +350,7 @@ void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lh void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -366,7 +366,7 @@ void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view l void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -382,7 +382,7 @@ void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -398,7 +398,7 @@ void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -414,7 +414,7 @@ void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -430,7 +430,7 @@ void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_v void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, [[maybe_unused]] std::string_view rhs) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, @@ -445,7 +445,7 @@ void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL instruction"); + NotImplemented(); } void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index f339f4ade..3de19cdfe 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -387,116 +387,116 @@ void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I [[maybe_unused]] std::string_view derivates, [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view lod_clamp) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords, [[maybe_unused]] std::string_view color) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageSampleImplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageSampleExplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageGather(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageGatherDref(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageFetch(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageQueryDimensions(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageQueryLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageGradient(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageRead(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBindlessImageWrite(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageSampleImplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageSampleExplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageGather(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageGatherDref(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageFetch(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageQueryDimensions(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageQueryLod(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageGradient(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageRead(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitBoundImageWrite(EmitContext&) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index c2e5aff16..5e0195b0f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -21,6 +21,8 @@ class EmitContext; inline void EmitSetLoopSafetyVariable(EmitContext&) {} inline void EmitGetLoopSafetyVariable(EmitContext&) {} +#define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) + // Microinstruction emitters void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 599ff90e0..e3d0b15ba 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -14,10 +14,6 @@ namespace Shader::Backend::GLSL { -static void NotImplemented() { - throw NotImplementedException("GLSL instruction"); -} - void EmitPhi(EmitContext& ctx, IR::Inst& phi) { const size_t num_args{phi.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index d3c8d330f..49fba9073 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -17,13 +17,13 @@ void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, [[maybe_unused]] std::string_view true_value, [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, [[maybe_unused]] std::string_view true_value, [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, @@ -39,7 +39,7 @@ void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, [[maybe_unused]] std::string_view true_value, [[maybe_unused]] std::string_view false_value) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 0b6c5ad82..fa11c656f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -38,7 +38,7 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, } // namespace void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { @@ -150,7 +150,7 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, [[maybe_unused]] std::string_view swizzle) { - throw NotImplementedException("GLSL Instruction"); + NotImplemented(); } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { From 19247ba4fad59b3d821b099dfbcd60e985041249 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 13:28:53 -0400 Subject: [PATCH 625/770] glsl: Implement geometry shaders --- .../backend/glsl/emit_context.cpp | 50 +++++++++++++++++-- .../backend/glsl/emit_glsl.cpp | 2 +- .../glsl/emit_glsl_context_get_set.cpp | 15 +++++- .../glsl/emit_glsl_not_implemented.cpp | 4 +- 4 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 2a5ec7414..923060386 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -21,6 +21,15 @@ std::string_view InterpDecorator(Interpolation interp) { throw InvalidArgument("Invalid interpolation {}", interp); } +std::string_view ArrayDecorator(Stage stage) { + switch (stage) { + case Stage::Geometry: + return "[1]"; + default: + return ""; + } +} + std::string_view SamplerType(TextureType type, bool is_depth) { if (is_depth) { switch (type) { @@ -64,6 +73,33 @@ std::string_view SamplerType(TextureType type, bool is_depth) { } } +std::string_view InputPrimitive(InputTopology topology) { + switch (topology) { + case InputTopology::Points: + return "points"; + case InputTopology::Lines: + return "lines"; + case InputTopology::LinesAdjacency: + return "lines_adjacency"; + case InputTopology::Triangles: + return "triangles"; + case InputTopology::TrianglesAdjacency: + return "triangles_adjacency"; + } + throw InvalidArgument("Invalid input topology {}", topology); +} + +std::string_view OutputPrimitive(OutputTopology topology) { + switch (topology) { + case OutputTopology::PointList: + return "points"; + case OutputTopology::LineStrip: + return "line_strip"; + case OutputTopology::TriangleStrip: + return "triangle_strip"; + } + throw InvalidArgument("Invalid output topology {}", topology); +} } // namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -85,6 +121,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; case Stage::Geometry: stage_name = "gs"; + header += fmt::format("layout({})in;layout({}, max_vertices={})out;\n", + InputPrimitive(runtime_info.input_topology), + OutputPrimitive(program.output_topology), program.output_vertices); break; case Stage::Fragment: stage_name = "fs"; @@ -99,8 +138,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { - header += fmt::format("layout(location={}) {} in vec4 in_attr{};", index, - InterpDecorator(generic.interpolation), index); + header += + fmt::format("layout(location={}){} in vec4 in_attr{}{};", index, + InterpDecorator(generic.interpolation), index, ArrayDecorator(stage)); } } for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { @@ -126,8 +166,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n"; - header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; - header += "#extension GL_NV_viewport_array2 : enable\n"; header += "#extension GL_EXT_texture_shadow_lod : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; @@ -157,6 +195,10 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } + if (info.stores_viewport_index) { + header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; + header += "#extension GL_NV_viewport_array2 : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 19cf4e46b..f467d978c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -162,7 +162,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { break; default: fmt::print("{}", node.type); - throw NotImplementedException("{}", node.type); + throw NotImplementedException("AbstractSyntaxNode::Type {}", node.type); break; } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index c66b4b282..28e89a0a6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -19,6 +19,15 @@ u32 CbufIndex(u32 offset) { char OffsetSwizzle(u32 offset) { return SWIZZLE[CbufIndex(offset)]; } + +bool IsInputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl || + stage == Stage::TessellationEval; +} + +std::string VertexIndex(EmitContext& ctx, std::string_view vertex) { + return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; +} } // namespace void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -128,7 +137,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.AddF32("{}=in_attr{}.{};", inst, index, swizzle); + ctx.AddF32("{}=in_attr{}{}.{};", inst, index, VertexIndex(ctx, vertex), swizzle); return; } switch (attr) { @@ -139,9 +148,11 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, switch (ctx.stage) { case Stage::VertexA: case Stage::VertexB: - case Stage::Geometry: ctx.AddF32("{}=gl_Position.{};", inst, swizzle); break; + case Stage::Geometry: + ctx.AddF32("{}=gl_in[{}].gl_Position.{};", inst, vertex, swizzle); + break; case Stage::Fragment: ctx.AddF32("{}=gl_FragCoord.{};", inst, swizzle); break; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index e3d0b15ba..9e5715605 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -103,11 +103,11 @@ void EmitEpilogue(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - NotImplemented(); + ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { - NotImplemented(); + ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); } void EmitGetRegister(EmitContext& ctx) { From 68d075d1e8af66c3f8044b162344bffc943168a8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 14:17:00 -0400 Subject: [PATCH 626/770] glsl: Fix atomic SSBO offsets and implement misc getters --- .../backend/glsl/emit_glsl_atomic.cpp | 133 +++++++++--------- .../backend/glsl/emit_glsl_instructions.h | 2 +- .../glsl/emit_glsl_not_implemented.cpp | 4 +- .../backend/glsl/emit_glsl_warp.cpp | 2 +- 4 files changed, 74 insertions(+), 67 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 9714ffe33..5394f4a8c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -27,16 +27,16 @@ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value, std::string_view function) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const std::string ssbo{ - fmt::format("{}_ssbo{}[{}]", ctx.stage_name, binding.U32(), offset.U32())}; + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); } void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value, std::string_view function) { - const std::string ssbo{ - fmt::format("{}_ssbo{}[{}]", ctx.stage_name, binding.U32(), offset.U32())}; + const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset))}; const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); ctx.AddF32("{}=uintBitsToFloat({});", inst, ret); @@ -111,8 +111,8 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), - value); + ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -123,8 +123,8 @@ void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicMin({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), - value); + ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -135,8 +135,8 @@ void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicMax({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), - value); + ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -151,116 +151,123 @@ void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& b void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), - value); + ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicOr({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), - value); + ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicXor({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), offset.U32(), - value); + ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}],{});", inst, ctx.stage_name, binding.U32(), - offset.U32(), value); + ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, - binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); - ctx.Add("{}_ssbo{}[{}]+=unpackUint2x32({}).x;{}_ssbo{}[{}]+=unpackUint2x32({}).y;", - ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(), - offset.U32() + 1, value); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, - binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); ctx.Add("for(int i=0;i<2;++i){{ " - "{}_ssbo{}[{}+i]=uint(min(int({}_ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", - ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), - offset.U32(), value); + "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, - binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); ctx.Add("for(int i=0;i<2;++i){{ " - "{}_ssbo{}[{}+i]=min({}_ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", - ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), - offset.U32(), value); + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, - binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); ctx.Add("for(int i=0;i<2;++i){{ " - "{}_ssbo{}[{}+i]=uint(max(int({}_ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", - ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), - offset.U32(), value); + "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" + ");}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}],{}_ssbo{}[{}]));", inst, ctx.stage_name, - binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), offset.U32() + 1); + + ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset)); ctx.Add("for(int " - "i=0;i<2;++i){{{}_ssbo{}[{}+i]=max({}_ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", - ctx.stage_name, binding.U32(), offset.U32(), ctx.stage_name, binding.U32(), - offset.U32(), value); + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}" + "))[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}],unpackUint2x32({}).x),atomicAnd({}_" - "ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, - binding.U32(), offset.U32() + 1, value); + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64( - "{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}],unpackUint2x32({}).x),atomicOr({}_ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(), - offset.U32() + 1, value); + ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64("{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}],unpackUint2x32({}).x),atomicXor({}_" - "ssbo{}[{}]," - "unpackUint2x32({}).y)));", - inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, - binding.U32(), offset.U32() + 1, value); + ctx.AddU64( + "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" + "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - ctx.AddU64( - "{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}],unpackUint2x32({}).x),atomicExchange(" - "{}_ssbo{}[{}],unpackUint2x32({}).y)));", - inst, ctx.stage_name, binding.U32(), offset.U32(), value, ctx.stage_name, binding.U32(), - offset.U32() + 1, value); + ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," + "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); } void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 5e0195b0f..24c36bbda 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -92,7 +92,7 @@ void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitSampleId(EmitContext& ctx, IR::Inst& inst); -void EmitIsHelperInvocation(EmitContext& ctx); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); void EmitYDirection(EmitContext& ctx, IR::Inst& inst); void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 9e5715605..3ed4e04d3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -211,8 +211,8 @@ void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitIsHelperInvocation(EmitContext& ctx) { - NotImplemented(); +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=gl_HelperInvocation;", inst); } void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index fa11c656f..38c49b164 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -38,7 +38,7 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, } // namespace void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { - NotImplemented(); + ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); } void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { From c7d085b505ab6a766bf37b34030fc9fcb5b662b7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 16:03:20 -0400 Subject: [PATCH 627/770] glsl: Implement ImageGradient and other texture function variants --- .../backend/glsl/emit_glsl_image.cpp | 101 ++++++++++++------ .../backend/glsl/emit_glsl_instructions.h | 4 +- 2 files changed, 73 insertions(+), 32 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 3de19cdfe..c62451e23 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -100,7 +100,7 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse [[maybe_unused]] const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_lod_clamp) { - throw NotImplementedException("Lod clamp samples"); + throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; @@ -108,8 +108,12 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { if (!offset.IsEmpty()) { - ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, - CastToIntVec(ctx.var_alloc.Consume(offset), info), bias); + const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; + if (ctx.stage == Stage::Fragment) { + ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); + } else { + ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str); + } } else { if (ctx.stage == Stage::Fragment) { ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); @@ -137,10 +141,10 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse [[maybe_unused]] const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_bias) { - throw NotImplementedException("Bias texture samples"); + throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); } if (info.has_lod_clamp) { - throw NotImplementedException("Lod clamp samples"); + throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; @@ -175,24 +179,32 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { - throw NotImplementedException("Sparse texture samples"); + throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples"); } if (info.has_bias) { - throw NotImplementedException("Bias texture samples"); + throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples"); } if (info.has_lod_clamp) { - throw NotImplementedException("Lod clamp samples"); - } - if (!offset.IsEmpty()) { - throw NotImplementedException("textureLodOffset"); + throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto cast{ShadowSamplerVecCast(info.type)}; - if (ctx.stage == Stage::Fragment) { - ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + if (!offset.IsEmpty()) { + const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, + offset_str, bias); + } else { + ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords, + dref, offset_str); + } } else { - ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + } else { + ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); + } } } @@ -206,22 +218,30 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { - throw NotImplementedException("Sparse texture samples"); + throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples"); } if (info.has_bias) { - throw NotImplementedException("Bias texture samples"); + throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples"); } if (info.has_lod_clamp) { - throw NotImplementedException("Lod clamp samples"); - } - if (!offset.IsEmpty()) { - throw NotImplementedException("textureLodOffset"); + throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; - if (info.type == TextureType::ColorArrayCube) { - ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); + if (!offset.IsEmpty()) { + const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, + offset_str); + } else { + ctx.AddF32("{}=textureLodOffset({},vec3({},{}),{},{});", inst, texture, coords, dref, + lod_lc, offset_str); + } } else { - ctx.AddF32("{}=textureLod({},vec3({},{}),{});", inst, texture, coords, dref, lod_lc); + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); + } else { + ctx.AddF32("{}=textureLod({},vec3({},{}),{});", inst, texture, coords, dref, lod_lc); + } } } @@ -316,10 +336,10 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst [[maybe_unused]] std::string_view ms) { const auto info{inst.Flags()}; if (info.has_bias) { - throw NotImplementedException("Bias texture samples"); + throw NotImplementedException("EmitImageFetch Bias texture samples"); } if (info.has_lod_clamp) { - throw NotImplementedException("Lod clamp samples"); + throw NotImplementedException("EmitImageFetch Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; @@ -368,7 +388,7 @@ void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused] "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, lod, texture); case TextureType::Buffer: - throw NotImplementedException("Texture buffers"); + throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); } throw LogicError("Unspecified image type {}", info.type.Value()); } @@ -384,10 +404,31 @@ void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view derivates, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view lod_clamp) { - NotImplemented(); + [[maybe_unused]] const IR::Value& derivatives, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& lod_clamp) { + const auto info{inst.Flags()}; + if (info.has_lod_clamp) { + throw NotImplementedException("EmitImageGradient Lod clamp samples"); + } + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageGradient Sparse"); + } + if (!offset.IsEmpty()) { + throw NotImplementedException("EmitImageGradient offset"); + } + const auto texture{Texture(ctx, info, index)}; + const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; + const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; + const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; + if (multi_component) { + ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } else { + ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords, + derivatives_vec, derivatives_vec); + } } void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 24c36bbda..e7009d8e9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -631,8 +631,8 @@ void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp); + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, const IR::Value& lod_clamp); void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, From 8c684b3e2327bc7b0c02f2a22dbf52c11884ecd3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 31 May 2021 23:07:13 -0400 Subject: [PATCH 628/770] glsl: Implement tessellation shaders --- .../backend/glsl/emit_context.cpp | 90 ++++++++++++++++--- .../glsl/emit_glsl_context_get_set.cpp | 69 +++++++++++++- .../backend/glsl/emit_glsl_floating_point.cpp | 2 +- .../backend/glsl/emit_glsl_instructions.h | 2 +- .../glsl/emit_glsl_not_implemented.cpp | 10 +-- 5 files changed, 146 insertions(+), 27 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 923060386..01403ca17 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -21,10 +21,21 @@ std::string_view InterpDecorator(Interpolation interp) { throw InvalidArgument("Invalid interpolation {}", interp); } -std::string_view ArrayDecorator(Stage stage) { +std::string_view InputArrayDecorator(Stage stage) { switch (stage) { case Stage::Geometry: - return "[1]"; + case Stage::TessellationControl: + case Stage::TessellationEval: + return "[]"; + default: + return ""; + } +} + +std::string OutputDecorator(Stage stage, u32 size) { + switch (stage) { + case Stage::TessellationControl: + return fmt::format("[{}]", size); default: return ""; } @@ -73,6 +84,30 @@ std::string_view SamplerType(TextureType type, bool is_depth) { } } +std::string_view GetTessMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Triangles: + return "triangles"; + case TessPrimitive::Quads: + return "quads"; + case TessPrimitive::Isolines: + return "isolines"; + } + throw InvalidArgument("Invalid tessellation primitive {}", primitive); +} + +std::string_view GetTessSpacing(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return "equal_spacing"; + case TessSpacing::FractionalOdd: + return "fractional_odd_spacing"; + case TessSpacing::FractionalEven: + return "fractional_even_spacing"; + } + throw InvalidArgument("Invalid tessellation spacing {}", spacing); +} + std::string_view InputPrimitive(InputTopology topology) { switch (topology) { case InputTopology::Points: @@ -100,6 +135,23 @@ std::string_view OutputPrimitive(OutputTopology topology) { } throw InvalidArgument("Invalid output topology {}", topology); } + +void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { + if (stage != Stage::VertexA && stage != Stage::VertexB && stage != Stage::Geometry) { + return; + } + header += "out gl_PerVertex{"; + if (info.stores_position) { + header += "vec4 gl_Position;"; + } + if (info.stores_point_size) { + header += "float gl_PointSize;"; + } + if (info.stores_clip_distance) { + header += "float gl_ClipDistance[];"; + } + header += "};"; +} } // namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -111,17 +163,20 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile case Stage::VertexA: case Stage::VertexB: stage_name = "vs"; - // TODO: add only what's used by the shader - header += - "out gl_PerVertex {vec4 gl_Position;float gl_PointSize;float gl_ClipDistance[];};"; break; case Stage::TessellationControl: + stage_name = "tsc"; + header += fmt::format("layout(vertices={})out;\n", program.invocations); + break; case Stage::TessellationEval: - stage_name = "ts"; + stage_name = "tse"; + header += fmt::format("layout({},{},{})in;\n", GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "cw" : "ccw"); break; case Stage::Geometry: stage_name = "gs"; - header += fmt::format("layout({})in;layout({}, max_vertices={})out;\n", + header += fmt::format("layout({})in;layout({},max_vertices={})out;\n", InputPrimitive(runtime_info.input_topology), OutputPrimitive(program.output_topology), program.output_vertices); break; @@ -135,12 +190,23 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile program.workgroup_size[2]); break; } + SetupOutPerVertex(stage, info, header); for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { - header += - fmt::format("layout(location={}){} in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, ArrayDecorator(stage)); + header += fmt::format("layout(location={}){} in vec4 in_attr{}{};", index, + InterpDecorator(generic.interpolation), index, + InputArrayDecorator(stage)); + } + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + if (stage == Stage::TessellationControl) { + header += fmt::format("layout(location={})patch out vec4 patch{};", index, index); + } else { + header += fmt::format("layout(location={})patch in vec4 patch{};", index, index); } } for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { @@ -151,8 +217,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - const auto declaration{ - fmt::format("layout(location={}) out vec4 out_attr{};", index, index)}; + const auto declaration{fmt::format("layout(location={}) out vec4 out_attr{}{};", index, + index, OutputDecorator(stage, program.invocations))}; if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { header += declaration; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 28e89a0a6..5c56477bf 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -25,9 +25,24 @@ bool IsInputArray(Stage stage) { stage == Stage::TessellationEval; } -std::string VertexIndex(EmitContext& ctx, std::string_view vertex) { +std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } + +bool IsOutputArray(Stage stage) { + return stage == Stage::Geometry || stage == Stage::TessellationControl; +} + +std::string OutputVertexIndex(EmitContext& ctx, std::string_view vertex) { + switch (ctx.stage) { + case Stage::Geometry: + return fmt::format("[{}]", vertex); + case Stage::TessellationControl: + return "[gl_InvocationID]"; + default: + return ""; + } +} } // namespace void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -132,12 +147,12 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding } void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, - [[maybe_unused]] std::string_view vertex) { + std::string_view vertex) { const u32 element{static_cast(attr) % 4}; const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.AddF32("{}=in_attr{}{}.{};", inst, index, VertexIndex(ctx, vertex), swizzle); + ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); return; } switch (attr) { @@ -150,6 +165,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case Stage::VertexB: ctx.AddF32("{}=gl_Position.{};", inst, swizzle); break; + case Stage::TessellationEval: + ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); + break; + case Stage::TessellationControl: case Stage::Geometry: ctx.AddF32("{}=gl_in[{}].gl_Position.{};", inst, vertex, swizzle); break; @@ -173,6 +192,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::FrontFace: ctx.AddF32("{}=intBitsToFloat(gl_FrontFacing?-1:0);", inst); break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); + break; default: fmt::print("Get attribute {}", attr); throw NotImplementedException("Get attribute {}", attr); @@ -185,7 +208,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.Add("out_attr{}.{}={};", index, swizzle, value); + ctx.Add("out_attr{}{}.{}={};", index, OutputVertexIndex(ctx, vertex), swizzle, value); return; } switch (attr) { @@ -219,6 +242,44 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val } } +void EmitGetPatch([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + const char swizzle{"xyzw"[element]}; + ctx.AddF32("{}=patch{}.{};", inst, index, swizzle); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const u32 element{IR::GenericPatchElement(patch)}; + ctx.Add("patch{}.{}={};", index, "xyzw"[element], value); + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + ctx.Add("gl_TessLevelOuter[{}]={};", index, value); + break; + } + case IR::Patch::TessellationLodInteriorU: + ctx.Add("gl_TessLevelInner[0]={};", value); + break; + case IR::Patch::TessellationLodInteriorV: + ctx.Add("gl_TessLevelInner[1]={};", value); + break; + default: + throw NotImplementedException("Patch {}", patch); + } +} + void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { const char swizzle{"xyzw"[component]}; ctx.Add("frag_color{}.{}={};", index, swizzle, value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 49ab182ea..f4b81407a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -161,7 +161,7 @@ void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - ctx.AddF32("{}=(1.0f)/sqrt({});", inst, value); + ctx.AddF32("{}=inversesqrt({});", inst, value); } void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index e7009d8e9..89ded3614 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -75,7 +75,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, std::string_view vertex); -void EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); void EmitSetSampleMask(EmitContext& ctx, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 3ed4e04d3..cf7b2a51e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -151,14 +151,6 @@ void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::str NotImplemented(); } -void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { - NotImplemented(); -} - -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { - NotImplemented(); -} - void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -204,7 +196,7 @@ void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { } void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); + ctx.AddU32("{}=uint(gl_InvocationID);", inst); } void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { From 59576b82a8c06943e6b9fafbff6ed1884a4132a7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 1 Jun 2021 00:07:14 -0400 Subject: [PATCH 629/770] glsl: Fix precise variable declaration and add some more separation in the shader for better debugability when dumped --- .../backend/glsl/emit_context.cpp | 3 +- .../backend/glsl/emit_glsl.cpp | 12 +++-- .../backend/glsl/var_alloc.cpp | 48 +++++++++---------- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 01403ca17..2375b7a06 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -150,7 +150,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { if (info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } - header += "};"; + header += "};\n"; } } // namespace @@ -223,6 +223,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += declaration; } } + header += "\n"; DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); SetupImages(bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index f467d978c..bfc42e1b4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -180,13 +180,15 @@ void DefineVariables(const EmitContext& ctx, std::string& header) { const auto type{static_cast(i)}; const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; const auto type_name{ctx.var_alloc.GetGlslType(type)}; + const auto precise{ + (type == GlslVarType::PrecF32 || type == GlslVarType::PrecF64) ? "precise " : ""}; // Temps/return types that are never used are stored at index 0 if (tracker.uses_temp) { - header += fmt::format("{}{}={}(0);", type_name, ctx.var_alloc.Representation(0, type), - type_name); + header += fmt::format("{}{} {}={}(0);", precise, type_name, + ctx.var_alloc.Representation(0, type), type_name); } for (u32 index = 1; index <= tracker.num_used; ++index) { - header += fmt::format("{}{}={}(0);", type_name, + header += fmt::format("{}{} {}={}(0);", precise, type_name, ctx.var_alloc.Representation(index, type), type_name); } } @@ -198,7 +200,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(program); EmitCode(ctx, program); - const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; + const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.local_memory_size > 0) { ctx.header += fmt::format("uint lmem[{}];", program.local_memory_size / 4); @@ -206,7 +208,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR if (program.shared_memory_size > 0) { ctx.header += fmt::format("shared uint smem[{}];", program.shared_memory_size / 4); } - ctx.header += "void main(){\n"; + ctx.header += "\nvoid main(){\n"; if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; // TODO: Properly resolve attribute issues diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 2ecdec7f2..1ab64add4 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -202,37 +202,35 @@ GlslVarType VarAlloc::RegType(IR::Type type) const { std::string VarAlloc::GetGlslType(GlslVarType type) const { switch (type) { case GlslVarType::U1: - return "bool "; + return "bool"; case GlslVarType::F16x2: - return "f16vec2 "; + return "f16vec2"; case GlslVarType::U32: - return "uint "; + return "uint"; case GlslVarType::S32: - return "int "; + return "int"; case GlslVarType::F32: - return "float "; - case GlslVarType::S64: - return "int64_t "; - case GlslVarType::U64: - return "uint64_t "; - case GlslVarType::F64: - return "double "; - case GlslVarType::U32x2: - return "uvec2 "; - case GlslVarType::F32x2: - return "vec2 "; - case GlslVarType::U32x3: - return "uvec3 "; - case GlslVarType::F32x3: - return "vec3 "; - case GlslVarType::U32x4: - return "uvec4 "; - case GlslVarType::F32x4: - return "vec4 "; case GlslVarType::PrecF32: - return "precise float "; + return "float"; + case GlslVarType::S64: + return "int64_t"; + case GlslVarType::U64: + return "uint64_t"; + case GlslVarType::F64: case GlslVarType::PrecF64: - return "precise double "; + return "double"; + case GlslVarType::U32x2: + return "uvec2"; + case GlslVarType::F32x2: + return "vec2"; + case GlslVarType::U32x3: + return "uvec3"; + case GlslVarType::F32x3: + return "vec3"; + case GlslVarType::U32x4: + return "uvec4"; + case GlslVarType::F32x4: + return "vec4"; case GlslVarType::Void: return ""; default: From fc29de7d5b75e93996a33546119c415cc9966327 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 31 May 2021 23:33:30 -0400 Subject: [PATCH 630/770] emit_glsl_context_get_set: Remove unused function --- .../backend/glsl/emit_glsl_context_get_set.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 5c56477bf..0cf31329d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -29,10 +29,6 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } -bool IsOutputArray(Stage stage) { - return stage == Stage::Geometry || stage == Stage::TessellationControl; -} - std::string OutputVertexIndex(EmitContext& ctx, std::string_view vertex) { switch (ctx.stage) { case Stage::Geometry: From 9f3970f837eebf290c03988ef00dee68ca68f07b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 1 Jun 2021 01:04:32 -0400 Subject: [PATCH 631/770] glsl: Add gl_ViewportIndex out attribute --- src/shader_recompiler/backend/glsl/emit_context.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 2375b7a06..6e164b958 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -151,6 +151,9 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { header += "float gl_ClipDistance[];"; } header += "};\n"; + if (info.stores_viewport_index) { + header += "out int gl_ViewportIndex;"; + } } } // namespace @@ -264,7 +267,6 @@ void EmitContext::SetupExtensions(std::string&) { } if (info.stores_viewport_index) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; - header += "#extension GL_NV_viewport_array2 : enable\n"; } } From 31147ffe69882141cb83bf83d5e01890524ab85a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 1 Jun 2021 01:49:24 -0400 Subject: [PATCH 632/770] glsl: Yet another gl_ViewportIndex fix attempt --- .../backend/glsl/emit_context.cpp | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 6e164b958..6f10002fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -32,6 +32,19 @@ std::string_view InputArrayDecorator(Stage stage) { } } +bool StoresPerVertexAttributes(Stage stage) { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + case Stage::TessellationControl: + case Stage::TessellationEval: + return true; + default: + return false; + } +} + std::string OutputDecorator(Stage stage, u32 size) { switch (stage) { case Stage::TessellationControl: @@ -137,7 +150,7 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { - if (stage != Stage::VertexA && stage != Stage::VertexB && stage != Stage::Geometry) { + if (!StoresPerVertexAttributes(stage)) { return; } header += "out gl_PerVertex{"; @@ -150,8 +163,11 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { if (info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } + if (info.stores_viewport_index && stage != Stage::Geometry) { + header += "int gl_ViewportIndex;"; + } header += "};\n"; - if (info.stores_viewport_index) { + if (info.stores_viewport_index && stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } @@ -265,7 +281,7 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } - if (info.stores_viewport_index) { + if (info.stores_viewport_index && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } } From f4799e8fa15b92d8d5607dc5dfca4974901ee06c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:33:03 -0400 Subject: [PATCH 633/770] glsl: Implement transform feedback --- .../backend/glsl/emit_context.cpp | 53 +++++++++++++++---- .../backend/glsl/emit_context.h | 8 +++ .../glsl/emit_glsl_context_get_set.cpp | 15 ++++-- .../renderer_opengl/gl_shader_cache.cpp | 18 +++++-- 4 files changed, 76 insertions(+), 18 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 6f10002fe..58355d5e3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -37,7 +37,6 @@ bool StoresPerVertexAttributes(Stage stage) { case Stage::VertexA: case Stage::VertexB: case Stage::Geometry: - case Stage::TessellationControl: case Stage::TessellationEval: return true; default: @@ -154,9 +153,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { return; } header += "out gl_PerVertex{"; - if (info.stores_position) { - header += "vec4 gl_Position;"; - } + header += "vec4 gl_Position;"; if (info.stores_point_size) { header += "float gl_PointSize;"; } @@ -236,10 +233,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - const auto declaration{fmt::format("layout(location={}) out vec4 out_attr{}{};", index, - index, OutputDecorator(stage, program.invocations))}; if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { - header += declaration; + DefineGenericOutput(index, program.invocations); } } header += "\n"; @@ -312,13 +307,53 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { } } +void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_index{static_cast(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + std::string definition{fmt::format("layout(location={}", index)}; + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!runtime_info.xfb_varyings.empty()) { + xfb_varying = &runtime_info.xfb_varyings[base_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + if (element > 0) { + definition += fmt::format(",component={}", element); + } + if (xfb_varying) { + definition += + fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, + xfb_varying->stride, xfb_varying->offset); + } + std::string name{fmt::format("out_attr{}", index)}; + if (num_components < 4 || element > 0) { + name += fmt::format("_{}", swizzle.substr(element, num_components)); + } + const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; + definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); + header += definition; + + const GenericElementInfo element_info{ + .name = name, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(output_generics[index].begin() + element, num_components, element_info); + element += num_components; + } + header += "\n"; +} + void EmitContext::DefineHelperFunctions() { if (info.uses_global_increment || info.uses_shared_increment) { header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; } if (info.uses_global_decrement || info.uses_shared_decrement) { - header += - "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; + header += "uint CasDecrement(uint op_a,uint " + "op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } if (info.uses_atomic_f32_add) { header += "uint CasFloatAdd(uint op_a,float op_b){return " diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 48786a2c7..5d48675e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -30,6 +30,12 @@ struct Program; namespace Shader::Backend::GLSL { +struct GenericElementInfo { + std::string name{}; + u32 first_element{}; + u32 num_components{}; +}; + class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -149,6 +155,7 @@ public: std::vector image_buffer_bindings; std::vector texture_bindings; std::vector image_bindings; + std::array, 32> output_generics{}; bool uses_y_direction{}; bool uses_cc_carry{}; @@ -157,6 +164,7 @@ private: void SetupExtensions(std::string& header); void DefineConstantBuffers(Bindings& bindings); void DefineStorageBuffers(Bindings& bindings); + void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); void SetupImages(Bindings& bindings); }; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0cf31329d..c48492a17 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -200,13 +200,21 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, [[maybe_unused]] std::string_view vertex) { - const u32 element{static_cast(attr) % 4}; - const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.Add("out_attr{}{}.{}={};", index, OutputVertexIndex(ctx, vertex), swizzle, value); + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + const auto output_decorator{OutputVertexIndex(ctx, vertex)}; + if (info.num_components == 1) { + ctx.Add("{}{}={};", info.name, output_decorator, value); + } else { + const u32 index_element{element - info.first_element}; + ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); + } return; } + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; switch (attr) { case IR::Attribute::PointSize: ctx.Add("gl_PointSize={};", value); @@ -233,7 +241,6 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val break; } default: - fmt::print("Set attribute {}", attr); throw NotImplementedException("Set attribute {}", attr); } } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cd11ff653..0a1ba363b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -92,9 +92,15 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, - bool glasm_use_storage_buffers) { + bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; switch (program.stage) { + case Shader::Stage::VertexB: + case Shader::Stage::Geometry: + if (!use_assembly_shaders && key.xfb_enabled != 0) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + } + break; case Shader::Stage::TessellationEval: info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { @@ -420,7 +426,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array assembly_programs; Shader::Backend::Bindings binding; - if (!device.UseAssemblyShaders()) { + const bool use_glasm{device.UseAssemblyShaders()}; + if (!use_glasm) { source_program.handle = glCreateProgram(); } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -434,8 +441,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; - if (device.UseAssemblyShaders()) { + const auto runtime_info{ + MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + if (use_glasm) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { @@ -443,7 +451,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( AttachShader(Stage(stage_index), source_program.handle, code); } } - if (!device.UseAssemblyShaders()) { + if (!use_glasm) { LinkProgram(source_program.handle); } return std::make_unique( From 6577a63d368afa57d5f29df40e524af30eaabffa Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:48:49 -0400 Subject: [PATCH 634/770] glsl: skip gl_ViewportIndex write if device does not support it --- .../backend/glsl/emit_context.cpp | 18 ++++++++++-------- .../backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ src/shader_recompiler/profile.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 1 + 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 58355d5e3..846d38bfc 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,23 +148,24 @@ std::string_view OutputPrimitive(OutputTopology topology) { throw InvalidArgument("Invalid output topology {}", topology); } -void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { - if (!StoresPerVertexAttributes(stage)) { +void SetupOutPerVertex(EmitContext& ctx, std::string& header) { + if (!StoresPerVertexAttributes(ctx.stage)) { return; } header += "out gl_PerVertex{"; header += "vec4 gl_Position;"; - if (info.stores_point_size) { + if (ctx.info.stores_point_size) { header += "float gl_PointSize;"; } - if (info.stores_clip_distance) { + if (ctx.info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } - if (info.stores_viewport_index && stage != Stage::Geometry) { + if (ctx.info.stores_viewport_index && ctx.supports_viewport_layer && + ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } header += "};\n"; - if (info.stores_viewport_index && stage == Stage::Geometry) { + if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } @@ -173,6 +174,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + supports_viewport_layer = profile.support_gl_vertex_viewport_layer; SetupExtensions(header); stage = program.stage; switch (program.stage) { @@ -206,7 +208,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile program.workgroup_size[2]); break; } - SetupOutPerVertex(stage, info, header); + SetupOutPerVertex(*this, header); for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { @@ -276,7 +278,7 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } - if (info.stores_viewport_index && stage != Stage::Geometry) { + if (info.stores_viewport_index && supports_viewport_layer && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 5d48675e6..26a76f8a3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -159,6 +159,7 @@ public: bool uses_y_direction{}; bool uses_cc_carry{}; + bool supports_viewport_layer{}; private: void SetupExtensions(std::string& header); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index c48492a17..ebaf50abd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -226,6 +226,11 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_Position.{}={};", swizzle, value); break; case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && !ctx.supports_viewport_layer) { + // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport + // layer extension"); + break; + } ctx.Add("gl_ViewportIndex=floatBitsToInt({});", value); break; case IR::Attribute::ClipDistance0: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 5d269368a..420117132 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -85,6 +85,7 @@ struct Profile { bool support_derivative_control{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; + bool support_gl_vertex_viewport_layer{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a1ba363b..77681594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,6 +225,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), + .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), From af9696059cc24e07fba2920814725e56c3c61df0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 20:37:24 -0400 Subject: [PATCH 635/770] glsl: Implement Images --- .../backend/glsl/emit_context.cpp | 50 +++++++++++++++++-- .../backend/glsl/emit_glsl_image.cpp | 33 ++++++++++-- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 846d38bfc..5048c8b68 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -91,11 +91,42 @@ std::string_view SamplerType(TextureType type, bool is_depth) { case TextureType::Buffer: return "samplerBuffer"; default: - fmt::print("Texture type: {}", type); throw NotImplementedException("Texture type: {}", type); } } +std::string_view ImageType(TextureType type) { + switch (type) { + case TextureType::Color2D: + return "uimage2D"; + default: + throw NotImplementedException("Image type: {}", type); + } +} + +std::string_view ImageFormatString(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return ""; + case ImageFormat::R8_UINT: + return ",r8ui"; + case ImageFormat::R8_SINT: + return ",r8i"; + case ImageFormat::R16_UINT: + return ",r16ui"; + case ImageFormat::R16_SINT: + return ",r16i"; + case ImageFormat::R32_UINT: + return ",r32ui"; + case ImageFormat::R32G32_UINT: + return ",rg32ui"; + case ImageFormat::R32G32B32A32_UINT: + return ",rgba32ui"; + default: + throw NotImplementedException("Image format: {}", format); + } +} + std::string_view GetTessMode(TessPrimitive primitive) { switch (primitive) { case TessPrimitive::Triangles: @@ -250,6 +281,7 @@ void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n"; header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } @@ -396,15 +428,25 @@ void EmitContext::DefineHelperFunctions() { void EmitContext::SetupImages(Bindings& bindings) { image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); for (const auto& desc : info.image_buffer_descriptors) { - throw NotImplementedException("image_buffer_descriptors"); + const auto indices{bindings.image + desc.count}; + for (u32 index = bindings.image; index < indices; ++index) { + header += fmt::format("layout(binding={}) uniform uimageBuffer img{};", bindings.image, + index); + } image_buffer_bindings.push_back(bindings.image); bindings.image += desc.count; } image_bindings.reserve(info.image_descriptors.size()); for (const auto& desc : info.image_descriptors) { - throw NotImplementedException("image_bindings"); - image_bindings.push_back(bindings.image); + const auto format{ImageFormatString(desc.format)}; + const auto image_type{ImageType(desc.type)}; + const auto qualifier{desc.is_written ? "" : "readonly "}; + const auto indices{bindings.image + desc.count}; + for (u32 index = bindings.image; index < indices; ++index) { + header += fmt::format("layout(binding={}{})uniform {}{} img{};", bindings.image, format, + qualifier, image_type, index); + } bindings.image += desc.count; } texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index c62451e23..8c54f0fb3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -14,15 +14,25 @@ namespace { std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, [[maybe_unused]] const IR::Value& index) { if (info.type == TextureType::Buffer) { - throw NotImplementedException("TextureType::Buffer"); + return fmt::format("tex{}", ctx.texture_buffer_bindings.at(info.descriptor_index)); } else { return fmt::format("tex{}", ctx.texture_bindings.at(info.descriptor_index)); } } +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, + [[maybe_unused]] const IR::Value& index) { + if (info.type == TextureType::Buffer) { + return fmt::format("img{}", ctx.image_buffer_bindings.at(info.descriptor_index)); + } else { + return fmt::format("img{}", ctx.image_bindings.at(info.descriptor_index)); + } +} + std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { switch (info.type) { case TextureType::Color1D: + case TextureType::Buffer: return fmt::format("int({})", value); case TextureType::ColorArray1D: case TextureType::Color2D: @@ -41,6 +51,7 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInfo& info) { switch (info.type) { case TextureType::Color1D: + case TextureType::Buffer: return fmt::format("int({})", value); case TextureType::ColorArray1D: case TextureType::Color2D: @@ -349,8 +360,12 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, TexelFetchCastToInt(coords, info), lod, TexelFetchCastToInt(offset, info)); } else { - ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, - TexelFetchCastToInt(coords, info), lod); + if (info.type == TextureType::Buffer) { + ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); + } else { + ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, + TexelFetchCastToInt(coords, info), lod); + } } return; } @@ -434,14 +449,22 @@ void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords) { - NotImplemented(); + const auto info{inst.Flags()}; + const auto sparse_inst{PrepareSparse(inst)}; + if (sparse_inst) { + throw NotImplementedException("EmitImageRead Sparse"); + } + const auto image{Image(ctx, info, index)}; + ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, TexelFetchCastToInt(coords, info)); } void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, [[maybe_unused]] std::string_view coords, [[maybe_unused]] std::string_view color) { - NotImplemented(); + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); } void EmitBindlessImageSampleImplicitLod(EmitContext&) { From 8d8ce24f20649be639dbb3cc0f3edc90c6a6481e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 3 Jun 2021 19:15:36 -0400 Subject: [PATCH 636/770] glsl: Implement Load/WriteGlobal along with some other misc changes and fixes --- .../backend/glsl/emit_context.cpp | 86 ++++++++++++++++++- .../backend/glsl/emit_glsl_atomic.cpp | 2 +- .../glsl/emit_glsl_bitwise_conversion.cpp | 4 +- .../glsl/emit_glsl_context_get_set.cpp | 57 ++++++------ .../backend/glsl/emit_glsl_image.cpp | 10 ++- .../backend/glsl/emit_glsl_instructions.h | 6 +- .../backend/glsl/emit_glsl_memory.cpp | 56 ++++++++++++ .../glsl/emit_glsl_not_implemented.cpp | 56 ------------ .../backend/glsl/var_alloc.cpp | 6 +- 9 files changed, 185 insertions(+), 98 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 5048c8b68..f68f33212 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -9,6 +9,14 @@ namespace Shader::Backend::GLSL { namespace { +u32 CbufIndex(u32 offset) { + return (offset / 4) % 4; +} + +char OffsetSwizzle(u32 offset) { + return "xyzw"[CbufIndex(offset)]; +} + std::string_view InterpDecorator(Interpolation interp) { switch (interp) { case Interpolation::Smooth: @@ -382,6 +390,8 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { } void EmitContext::DefineHelperFunctions() { + header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" + "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; if (info.uses_global_increment || info.uses_shared_increment) { header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; } @@ -391,7 +401,7 @@ void EmitContext::DefineHelperFunctions() { } if (info.uses_atomic_f32_add) { header += "uint CasFloatAdd(uint op_a,float op_b){return " - "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; + "ftou(utof(op_a)+op_b);}\n"; } if (info.uses_atomic_f32x2_add) { header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " @@ -423,6 +433,80 @@ void EmitContext::DefineHelperFunctions() { if (info.uses_atomic_s32_max) { header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } + if (info.uses_global_memory) { + std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){\n"}; + std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){\n"}; + std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){\n"}; + + std::string load_func{"uint LoadGlobal32(uint64_t addr){\n"}; + std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){\n"}; + std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){\n"}; + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; + const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; + const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; + const auto cbuf_value{fmt::format( + "uint64_t {}=packUint2x32(uvec2(ftou({}[{}].{}),ftou({}[{}].{})));", ssbo_addr, + cbuf, ssbo.cbuf_offset / 16, OffsetSwizzle(ssbo.cbuf_offset), cbuf, + (ssbo.cbuf_offset + 4) / 16, OffsetSwizzle(ssbo.cbuf_offset + 4))}; + + write_func += cbuf_value; + write_func_64 += cbuf_value; + write_func_128 += cbuf_value; + load_func += cbuf_value; + load_func_64 += cbuf_value; + load_func_128 += cbuf_value; + const auto ssbo_size{fmt::format("ftou({}[{}].{}),ftou({}[{}].{})", cbuf, + size_cbuf_offset / 16, OffsetSwizzle(size_cbuf_offset), + cbuf, (size_cbuf_offset + 4) / 16, + OffsetSwizzle(size_cbuf_offset + 4))}; + const auto comparison{fmt::format("if((addr>={})&&(addr<({}+\nuint64_t(uvec2({}))))){{", + ssbo_addr, ssbo_addr, ssbo_size)}; + write_func += comparison; + write_func_64 += comparison; + write_func_128 += comparison; + load_func += comparison; + load_func_64 += comparison; + load_func_128 += comparison; + + const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; + write_func += fmt::format("{}[uint(addr-{})>>2]=data;return;}}", ssbo_name, ssbo_addr); + write_func_64 += + fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;return;}}", + ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); + write_func_128 += + fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;{}[uint(" + "addr-{}+8)>>2]=data.z;{}[uint(addr-{}+12)>>2]=data.w;return;}}", + ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, + ssbo_name, ssbo_addr); + load_func += fmt::format("return {}[uint(addr-{})>>2];}}", ssbo_name, ssbo_addr); + load_func_64 += + fmt::format("return uvec2({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2]);}}", + ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); + load_func_128 += fmt::format("return " + "uvec4({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2],{}[" + "uint(addr-{}+8)>>2],{}[uint(addr-{}+12)>>2]);}}", + ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name, + ssbo_addr, ssbo_name, ssbo_addr); + } + write_func += "}\n"; + write_func_64 += "}\n"; + write_func_128 += "}\n"; + load_func += "return 0u;}\n"; + load_func_64 += "return uvec2(0);}\n"; + load_func_128 += "return uvec4(0);}\n"; + header += write_func; + header += write_func_64; + header += write_func_128; + header += load_func; + header += load_func_64; + header += load_func_128; + } } void EmitContext::SetupImages(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 5394f4a8c..f8d2c12db 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -39,7 +39,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi ctx.var_alloc.Consume(offset))}; const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); - ctx.AddF32("{}=uintBitsToFloat({});", inst, ret); + ctx.AddF32("{}=utof({});", inst, ret); } } // namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 1e860f11a..0e617c8d8 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -40,7 +40,7 @@ void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=floatBitsToUint({});", inst, value); + ctx.AddU32("{}=ftou({});", inst, value); } void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { @@ -52,7 +52,7 @@ void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=uintBitsToFloat({});", inst, value); + ctx.AddF32("{}=utof({});", inst, value); } void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index ebaf50abd..19b51a813 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -45,14 +45,13 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}].{}),int({}),8);", inst, - ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, + binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32( - "{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", - inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } } @@ -60,14 +59,13 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}].{}),int({}),8);", inst, - ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, + binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32( - "{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", - inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, + ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } } @@ -75,12 +73,12 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}].{}),int({}),16);", inst, - ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, + binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" + ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" "2)%2)*16),16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } @@ -90,12 +88,12 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}].{}),int({}),16);", inst, - ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, + binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" + ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" "2)%2)*16),16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); } @@ -104,12 +102,12 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(), + ctx.AddU32("{}=ftou({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32())); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, - binding.U32(), offset_var, offset_var); + ctx.AddU32("{}=ftou({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, binding.U32(), + offset_var, offset_var); } } @@ -128,15 +126,14 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { if (offset.IsImmediate()) { - ctx.AddU32x2( - "{}=uvec2(floatBitsToUint({}_cbuf{}[{}].{}),floatBitsToUint({}_cbuf{}[{}].{}));", inst, - ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16, - OffsetSwizzle(offset.U32() + 4)); + ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}].{}),ftou({}_cbuf{}[{}].{}));", inst, + ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), + ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16, + OffsetSwizzle(offset.U32() + 4)); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/" - "4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", + ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}/16][({}/" + "4)%4]),ftou({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -180,13 +177,13 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); break; case IR::Attribute::InstanceId: - ctx.AddF32("{}=intBitsToFloat(gl_InstanceID);", inst); + ctx.AddF32("{}=itof(gl_InstanceID);", inst); break; case IR::Attribute::VertexId: - ctx.AddF32("{}=intBitsToFloat(gl_VertexID);", inst); + ctx.AddF32("{}=itof(gl_VertexID);", inst); break; case IR::Attribute::FrontFace: - ctx.AddF32("{}=intBitsToFloat(gl_FrontFacing?-1:0);", inst); + ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); break; case IR::Attribute::TessellationEvaluationPointU: case IR::Attribute::TessellationEvaluationPointV: @@ -231,7 +228,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val // layer extension"); break; } - ctx.Add("gl_ViewportIndex=floatBitsToInt({});", value); + ctx.Add("gl_ViewportIndex=ftoi({});", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 8c54f0fb3..37ddd57d3 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -212,7 +212,11 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, } } else { if (ctx.stage == Stage::Fragment) { - ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + if (info.type == TextureType::ColorArrayCube) { + ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); + } else { + ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); + } } else { ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); } @@ -238,6 +242,7 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; + const auto cast{ShadowSamplerVecCast(info.type)}; if (!offset.IsEmpty()) { const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; if (info.type == TextureType::ColorArrayCube) { @@ -251,7 +256,8 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, if (info.type == TextureType::ColorArrayCube) { ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); } else { - ctx.AddF32("{}=textureLod({},vec3({},{}),{});", inst, texture, coords, dref, lod_lc); + ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref, + lod_lc); } } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 89ded3614..90dcfcef7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -105,9 +105,9 @@ void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); void EmitLoadGlobalS16(EmitContext& ctx); -void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); -void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); -void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address); void EmitWriteGlobalU8(EmitContext& ctx); void EmitWriteGlobalS8(EmitContext& ctx); void EmitWriteGlobalU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index a4411b68b..bc4363824 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -9,6 +9,62 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +void EmitLoadGlobalU8([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS8([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU16([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS16([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + ctx.AddU32("{}=LoadGlobal32({});", inst, address); +} + +void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); +} + +void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { + ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); +} + +void EmitWriteGlobalU8([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS8([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalU16([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS16([[maybe_unused]] EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + ctx.Add("WriteGlobal32({},{});", address, value); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + ctx.Add("WriteGlobal64({},{});", address, value); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + ctx.Add("WriteGlobal128({},{});", address, value); +} + void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index cf7b2a51e..cac803146 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -232,62 +232,6 @@ void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { NotImplemented(); } -void EmitLoadGlobalU8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobalS8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobalU16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobalS16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { - NotImplemented(); -} - -void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { - NotImplemented(); -} - -void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { - NotImplemented(); -} - -void EmitWriteGlobalU8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobalS8(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobalU16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobalS16(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { - NotImplemented(); -} - -void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { - NotImplemented(); -} - -void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 1ab64add4..0ae56651e 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -58,13 +58,13 @@ std::string FormatFloat(std::string_view value, IR::Type type) { // TODO: Confirm FP64 nan/inf if (type == IR::Type::F32) { if (value == "nan") { - return "uintBitsToFloat(0x7fc00000)"; + return "utof(0x7fc00000)"; } if (value == "inf") { - return "uintBitsToFloat(0x7f800000)"; + return "utof(0x7f800000)"; } if (value == "-inf") { - return "uintBitsToFloat(0xff800000)"; + return "utof(0xff800000)"; } } if (value.find_first_of('e') != std::string_view::npos) { From a68fabf6d5847e36bfa72edc149a8c7420868583 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 3 Jun 2021 19:16:25 -0400 Subject: [PATCH 637/770] glsl: Increase NUM_VARS that can be allocated needed for HW:AoC. --- src/shader_recompiler/backend/glsl/var_alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h index be21a87ea..ed936f8dc 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.h +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -57,7 +57,7 @@ static_assert(sizeof(Id) == sizeof(u32)); class VarAlloc { public: - static constexpr size_t NUM_VARS = 511; + static constexpr size_t NUM_VARS = 1023; struct UseTracker { size_t num_used{}; std::bitset var_use{}; From 5355568a2dbbb5bc4122109e2bd04ce6903adff1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 3 Jun 2021 20:24:56 -0400 Subject: [PATCH 638/770] glsl: Refactor Global memory functions --- .../backend/glsl/emit_context.cpp | 149 +++++++++--------- .../backend/glsl/emit_context.h | 1 + 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index f68f33212..fbc4b9c0f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -9,11 +9,11 @@ namespace Shader::Backend::GLSL { namespace { -u32 CbufIndex(u32 offset) { +u32 CbufIndex(size_t offset) { return (offset / 4) % 4; } -char OffsetSwizzle(u32 offset) { +char CbufSwizzle(size_t offset) { return "xyzw"[CbufIndex(offset)]; } @@ -434,81 +434,82 @@ void EmitContext::DefineHelperFunctions() { header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } if (info.uses_global_memory) { - std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){\n"}; - std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){\n"}; - std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){\n"}; - - std::string load_func{"uint LoadGlobal32(uint64_t addr){\n"}; - std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){\n"}; - std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){\n"}; - const size_t num_buffers{info.storage_buffers_descriptors.size()}; - for (size_t index = 0; index < num_buffers; ++index) { - if (!info.nvn_buffer_used[index]) { - continue; - } - const auto& ssbo{info.storage_buffers_descriptors[index]}; - const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; - const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; - const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; - const auto cbuf_value{fmt::format( - "uint64_t {}=packUint2x32(uvec2(ftou({}[{}].{}),ftou({}[{}].{})));", ssbo_addr, - cbuf, ssbo.cbuf_offset / 16, OffsetSwizzle(ssbo.cbuf_offset), cbuf, - (ssbo.cbuf_offset + 4) / 16, OffsetSwizzle(ssbo.cbuf_offset + 4))}; - - write_func += cbuf_value; - write_func_64 += cbuf_value; - write_func_128 += cbuf_value; - load_func += cbuf_value; - load_func_64 += cbuf_value; - load_func_128 += cbuf_value; - const auto ssbo_size{fmt::format("ftou({}[{}].{}),ftou({}[{}].{})", cbuf, - size_cbuf_offset / 16, OffsetSwizzle(size_cbuf_offset), - cbuf, (size_cbuf_offset + 4) / 16, - OffsetSwizzle(size_cbuf_offset + 4))}; - const auto comparison{fmt::format("if((addr>={})&&(addr<({}+\nuint64_t(uvec2({}))))){{", - ssbo_addr, ssbo_addr, ssbo_size)}; - write_func += comparison; - write_func_64 += comparison; - write_func_128 += comparison; - load_func += comparison; - load_func_64 += comparison; - load_func_128 += comparison; - - const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; - write_func += fmt::format("{}[uint(addr-{})>>2]=data;return;}}", ssbo_name, ssbo_addr); - write_func_64 += - fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;return;}}", - ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); - write_func_128 += - fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;{}[uint(" - "addr-{}+8)>>2]=data.z;{}[uint(addr-{}+12)>>2]=data.w;return;}}", - ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, - ssbo_name, ssbo_addr); - load_func += fmt::format("return {}[uint(addr-{})>>2];}}", ssbo_name, ssbo_addr); - load_func_64 += - fmt::format("return uvec2({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2]);}}", - ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); - load_func_128 += fmt::format("return " - "uvec4({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2],{}[" - "uint(addr-{}+8)>>2],{}[uint(addr-{}+12)>>2]);}}", - ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name, - ssbo_addr, ssbo_name, ssbo_addr); - } - write_func += "}\n"; - write_func_64 += "}\n"; - write_func_128 += "}\n"; - load_func += "return 0u;}\n"; - load_func_64 += "return uvec2(0);}\n"; - load_func_128 += "return uvec4(0);}\n"; - header += write_func; - header += write_func_64; - header += write_func_128; - header += load_func; - header += load_func_64; - header += load_func_128; + header += DefineGlobalMemoryFunctions(); } } +std::string EmitContext::DefineGlobalMemoryFunctions() { + const auto define_body{[&](std::string& func, size_t index, u32 num_components, + std::string_view return_statement) { + const auto& ssbo{info.storage_buffers_descriptors[index]}; + const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; + const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; + const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; + std::array addr_xy; + std::array size_xy; + for (size_t i = 0; i < addr_xy.size(); ++i) { + const auto addr_loc{ssbo.cbuf_offset + 4 * i}; + const auto size_loc{size_cbuf_offset + 4 * i}; + addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, CbufSwizzle(addr_loc)); + size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, CbufSwizzle(size_loc)); + } + const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; + const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; + func += addr_statment; + + const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; + const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; + const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; + const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; + func += comparison; + + const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; + switch (num_components) { + case 1: + func += fmt::format(return_statement, ssbo_name, ssbo_addr); + break; + case 2: + func += fmt::format(return_statement, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); + break; + case 4: + func += fmt::format(return_statement, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, + ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); + break; + } + }}; + std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){\n"}; + std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){\n"}; + std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){\n"}; + std::string load_func{"uint LoadGlobal32(uint64_t addr){\n"}; + std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){\n"}; + std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){\n"}; + const size_t num_buffers{info.storage_buffers_descriptors.size()}; + for (size_t index = 0; index < num_buffers; ++index) { + if (!info.nvn_buffer_used[index]) { + continue; + } + define_body(write_func, index, 1, "{}[uint(addr-{})>>2]=data;return;}}"); + define_body(write_func_64, index, 2, + "{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;return;}}"); + define_body(write_func_128, index, 4, + "{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;{}[uint(" + "addr-{}+8)>>2]=data.z;{}[uint(addr-{}+12)>>2]=data.w;return;}}"); + define_body(load_func, index, 1, "return {}[uint(addr-{})>>2];}}"); + define_body(load_func_64, index, 2, + "return uvec2({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2]);}}"); + define_body(load_func_128, index, 4, + "return uvec4({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2],{}[" + "uint(addr-{}+8)>>2],{}[uint(addr-{}+12)>>2]);}}"); + } + write_func += "}"; + write_func_64 += "}"; + write_func_128 += "}"; + load_func += "return 0u;}"; + load_func_64 += "return uvec2(0);}"; + load_func_128 += "return uvec4(0);}"; + return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; +} + void EmitContext::SetupImages(Bindings& bindings) { image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); for (const auto& desc : info.image_buffer_descriptors) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 26a76f8a3..daca1b6f9 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -167,6 +167,7 @@ private: void DefineStorageBuffers(Bindings& bindings); void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); + std::string DefineGlobalMemoryFunctions(); void SetupImages(Bindings& bindings); }; From 34fdb6471d6050b438fd53a0406aedbf6b690600 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 3 Jun 2021 20:57:52 -0400 Subject: [PATCH 639/770] glsl: Cleanup and address feedback --- .../backend/glsl/emit_context.cpp | 73 +++++++++---------- .../backend/glsl/emit_context.h | 4 +- .../backend/glsl/emit_glsl.cpp | 15 ++-- .../backend/glsl/emit_glsl_atomic.cpp | 10 +-- .../glsl/emit_glsl_bitwise_conversion.cpp | 2 +- .../backend/glsl/emit_glsl_composite.cpp | 5 +- .../glsl/emit_glsl_context_get_set.cpp | 30 ++++---- .../backend/glsl/emit_glsl_floating_point.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 2 +- .../glsl/emit_glsl_not_implemented.cpp | 12 +-- 10 files changed, 69 insertions(+), 86 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index fbc4b9c0f..ae5ac752d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -22,9 +22,9 @@ std::string_view InterpDecorator(Interpolation interp) { case Interpolation::Smooth: return ""; case Interpolation::Flat: - return "flat"; + return "flat "; case Interpolation::NoPerspective: - return "noperspective"; + return "noperspective "; } throw InvalidArgument("Invalid interpolation {}", interp); } @@ -77,7 +77,6 @@ std::string_view SamplerType(TextureType type, bool is_depth) { case TextureType::ColorArrayCube: return "samplerCubeArrayShadow"; default: - fmt::print("Texture type: {}", type); throw NotImplementedException("Texture type: {}", type); } } @@ -191,29 +190,27 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { if (!StoresPerVertexAttributes(ctx.stage)) { return; } - header += "out gl_PerVertex{"; - header += "vec4 gl_Position;"; + header += "out gl_PerVertex{vec4 gl_Position;"; if (ctx.info.stores_point_size) { header += "float gl_PointSize;"; } if (ctx.info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.supports_viewport_layer && + if (ctx.info.stores_viewport_index && ctx.profile.support_gl_vertex_viewport_layer && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } - header += "};\n"; + header += "};"; if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } -} // namespace +} // Anonymous namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { - supports_viewport_layer = profile.support_gl_vertex_viewport_layer; SetupExtensions(header); stage = program.stage; switch (program.stage) { @@ -222,18 +219,18 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile stage_name = "vs"; break; case Stage::TessellationControl: - stage_name = "tsc"; - header += fmt::format("layout(vertices={})out;\n", program.invocations); + stage_name = "tcs"; + header += fmt::format("layout(vertices={})out;", program.invocations); break; case Stage::TessellationEval: - stage_name = "tse"; - header += fmt::format("layout({},{},{})in;\n", GetTessMode(runtime_info.tess_primitive), + stage_name = "tes"; + header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), GetTessSpacing(runtime_info.tess_spacing), runtime_info.tess_clockwise ? "cw" : "ccw"); break; case Stage::Geometry: stage_name = "gs"; - header += fmt::format("layout({})in;layout({},max_vertices={})out;\n", + header += fmt::format("layout({})in;layout({},max_vertices={})out;", InputPrimitive(runtime_info.input_topology), OutputPrimitive(program.output_topology), program.output_vertices); break; @@ -242,7 +239,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; case Stage::Compute: stage_name = "cs"; - header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", + header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", program.workgroup_size[0], program.workgroup_size[1], program.workgroup_size[2]); break; @@ -251,7 +248,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { - header += fmt::format("layout(location={}){} in vec4 in_attr{}{};", index, + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); } @@ -260,11 +257,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (!info.uses_patches[index]) { continue; } - if (stage == Stage::TessellationControl) { - header += fmt::format("layout(location={})patch out vec4 patch{};", index, index); - } else { - header += fmt::format("layout(location={})patch in vec4 patch{};", index, index); - } + const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; + header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); } for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { if (!info.stores_frag_color[index]) { @@ -278,18 +272,18 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineGenericOutput(index, program.invocations); } } - header += "\n"; DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); SetupImages(bindings); + SetupTextures(bindings); DefineHelperFunctions(); } void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage - header += "#extension GL_ARB_sparse_texture2 : enable\n"; - header += "#extension GL_EXT_texture_shadow_lod : enable\n"; - header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; + header += "#extension GL_ARB_sparse_texture2 : enable\n" + "#extension GL_EXT_texture_shadow_lod : enable\n" + "#extension GL_EXT_shader_image_load_formatted : enable\n"; if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } @@ -312,13 +306,14 @@ void EmitContext::SetupExtensions(std::string&) { } if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || info.uses_subgroup_shuffles || info.uses_fswzadd) { - header += "#extension GL_ARB_shader_ballot : enable\n"; - header += "#extension GL_ARB_shader_group_vote : enable\n"; + header += "#extension GL_ARB_shader_ballot : enable\n" + "#extension GL_ARB_shader_group_vote : enable\n"; if (!info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } - if (info.stores_viewport_index && supports_viewport_layer && stage != Stage::Geometry) { + if (info.stores_viewport_index && profile.support_gl_vertex_viewport_layer && + stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } } @@ -386,46 +381,45 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { std::fill_n(output_generics[index].begin() + element, num_components, element_info); element += num_components; } - header += "\n"; } void EmitContext::DefineHelperFunctions() { header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; if (info.uses_global_increment || info.uses_shared_increment) { - header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; + header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; } if (info.uses_global_decrement || info.uses_shared_decrement) { header += "uint CasDecrement(uint op_a,uint " - "op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; + "op_b){return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; } if (info.uses_atomic_f32_add) { header += "uint CasFloatAdd(uint op_a,float op_b){return " - "ftou(utof(op_a)+op_b);}\n"; + "ftou(utof(op_a)+op_b);}"; } if (info.uses_atomic_f32x2_add) { header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; + "packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; } if (info.uses_atomic_f32x2_min) { header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; + "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; } if (info.uses_atomic_f32x2_max) { header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; + "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; } if (info.uses_atomic_f16x2_add) { header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " - "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; + "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; } if (info.uses_atomic_f16x2_min) { header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " - "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; + "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; } if (info.uses_atomic_f16x2_max) { header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " - "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; + "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; } if (info.uses_atomic_s32_min) { header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; @@ -534,6 +528,9 @@ void EmitContext::SetupImages(Bindings& bindings) { } bindings.image += desc.count; } +} + +void EmitContext::SetupTextures(Bindings& bindings) { texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { texture_buffer_bindings.push_back(bindings.texture); diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index daca1b6f9..9bdca184f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -31,7 +31,7 @@ struct Program; namespace Shader::Backend::GLSL { struct GenericElementInfo { - std::string name{}; + std::string name; u32 first_element{}; u32 num_components{}; }; @@ -159,7 +159,6 @@ public: bool uses_y_direction{}; bool uses_cc_carry{}; - bool supports_viewport_layer{}; private: void SetupExtensions(std::string& header); @@ -169,6 +168,7 @@ private: void DefineHelperFunctions(); std::string DefineGlobalMemoryFunctions(); void SetupImages(Bindings& bindings); + void SetupTextures(Bindings& bindings); }; } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index bfc42e1b4..7b57c1e91 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -83,7 +83,6 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - // ctx.Add("/* $ {} $ */", inst->GetOpcode()); switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ @@ -134,7 +133,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } break; case IR::AbstractSyntaxNode::Type::If: - ctx.Add("if ({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); + ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); break; case IR::AbstractSyntaxNode::Type::EndIf: ctx.Add("}}"); @@ -156,12 +155,10 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.repeat.cond)); - ctx.Add("continue;\n}}else{{"); - ctx.Add("break;\n}}\n}}"); + ctx.Add("if({}){{continue;}}else{{break;}}}}", + ctx.var_alloc.Consume(node.data.repeat.cond)); break; default: - fmt::print("{}", node.type); throw NotImplementedException("AbstractSyntaxNode::Type {}", node.type); break; } @@ -200,7 +197,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(program); EmitCode(ctx, program); - const std::string version{fmt::format("#version 460{}\n", GlslVersionSpecifier(ctx))}; + const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.local_memory_size > 0) { ctx.header += fmt::format("uint lmem[{}];", program.local_memory_size / 4); @@ -225,10 +222,8 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR if (program.info.uses_subgroup_shuffles) { ctx.header += "bool shfl_in_bounds;"; } - ctx.header += "\n"; ctx.code.insert(0, ctx.header); - ctx.code += "}"; - // fmt::print("\n{}\n", ctx.code); + ctx.code += '}'; return ctx.code; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index f8d2c12db..5ba39261b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -11,7 +11,7 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view cas_loop{R"(for (;;){{ +constexpr const char cas_loop[]{R"(for (;;){{ uint old_value={}; {}=atomicCompSwap({},old_value,{}({},{})); if ({}==old_value){{break;}} @@ -21,7 +21,7 @@ void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset std::string_view value, std::string_view function) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const std::string smem{fmt::format("smem[{}>>2]", offset)}; - ctx.Add(cas_loop.data(), smem, ret, smem, function, smem, value, ret); + ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret); } void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -29,7 +29,7 @@ void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset))}; - ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); } void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -38,10 +38,10 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset))}; const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret); + ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); ctx.AddF32("{}=utof({});", inst, ret); } -} // namespace +} // Anonymous namespace void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 0e617c8d8..eff672cc4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -10,7 +10,7 @@ namespace Shader::Backend::GLSL { namespace { -static void Alias(IR::Inst& inst, const IR::Value& value) { +void Alias(IR::Inst& inst, const IR::Value& value) { if (value.IsImmediate()) { return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 3697e1a34..954fc67b1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -10,13 +10,14 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view SWIZZLE{"xyzw"}; +constexpr std::string_view SWIZZLE{"xyzw"}; void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, std::string_view object, u32 index) { ctx.Add("{}={};", result, composite); ctx.Add("{}.{}={};", result, SWIZZLE[index], object); } -} // namespace +} // Anonymous namespace + void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, std::string_view e2) { ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 19b51a813..d986e1b1a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -39,11 +40,10 @@ std::string OutputVertexIndex(EmitContext& ctx, std::string_view vertex) { return ""; } } -} // namespace +} // Anonymous namespace -void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { if (offset.IsImmediate()) { ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), @@ -55,9 +55,8 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& } } -void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { if (offset.IsImmediate()) { ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), @@ -69,9 +68,8 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& } } -void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { if (offset.IsImmediate()) { ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), @@ -84,9 +82,8 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst } } -void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { if (offset.IsImmediate()) { ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), @@ -196,7 +193,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - [[maybe_unused]] std::string_view vertex) { + std::string_view vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const u32 element{IR::GenericAttributeElement(attr)}; @@ -223,7 +220,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_Position.{}={};", swizzle, value); break; case IR::Attribute::ViewportIndex: - if (ctx.stage != Stage::Geometry && !ctx.supports_viewport_layer) { + if (ctx.stage != Stage::Geometry && !ctx.profile.support_gl_vertex_viewport_layer) { // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport // layer extension"); break; @@ -247,8 +244,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val } } -void EmitGetPatch([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] IR::Patch patch) { +void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Non-generic patch load"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index f4b81407a..adeafdd3d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -25,7 +25,7 @@ void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string bool Precise(IR::Inst& inst) { return {inst.Flags().no_contraction}; } -} // namespace +} // Anonymous namespace void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 37ddd57d3..ce3a82656 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -102,7 +102,7 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } return sparse_inst; } -} // namespace +} // Anonymous namespace void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] const IR::Value& index, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index cac803146..c64d4325d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -25,9 +25,7 @@ void EmitPhi(EmitContext& ctx, IR::Inst& phi) { } } -void EmitVoid(EmitContext& ctx) { - // NotImplemented(); -} +void EmitVoid(EmitContext& ctx) {} void EmitReference(EmitContext& ctx, const IR::Value& value) { ctx.var_alloc.Consume(value); @@ -94,13 +92,9 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { NotImplemented(); } -void EmitPrologue(EmitContext& ctx) { - // NotImplemented(); -} +void EmitPrologue(EmitContext& ctx) {} -void EmitEpilogue(EmitContext& ctx) { - // NotImplemented(); -} +void EmitEpilogue(EmitContext& ctx) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); From 0a0b0a73d82057a309b6b0427c29c7e15e2b356f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 3 Jun 2021 22:25:06 -0400 Subject: [PATCH 640/770] glsl: Fix <32-bit SSBO writes and more cleanup --- .../backend/glsl/emit_context.cpp | 52 +++++++------------ .../backend/glsl/emit_glsl_atomic.cpp | 7 +-- .../glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_memory.cpp | 32 +++++++----- 4 files changed, 43 insertions(+), 50 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ae5ac752d..ecc7335ba 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -433,8 +433,7 @@ void EmitContext::DefineHelperFunctions() { } std::string EmitContext::DefineGlobalMemoryFunctions() { - const auto define_body{[&](std::string& func, size_t index, u32 num_components, - std::string_view return_statement) { + const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { const auto& ssbo{info.storage_buffers_descriptors[index]}; const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; @@ -458,42 +457,31 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { func += comparison; const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; - switch (num_components) { - case 1: - func += fmt::format(return_statement, ssbo_name, ssbo_addr); - break; - case 2: - func += fmt::format(return_statement, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); - break; - case 4: - func += fmt::format(return_statement, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, - ssbo_name, ssbo_addr, ssbo_name, ssbo_addr); - break; - } + func += fmt::format(return_statement, ssbo_name, ssbo_addr); }}; - std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){\n"}; - std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){\n"}; - std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){\n"}; - std::string load_func{"uint LoadGlobal32(uint64_t addr){\n"}; - std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){\n"}; - std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){\n"}; + std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; + std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; + std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; + std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; + std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; + std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; const size_t num_buffers{info.storage_buffers_descriptors.size()}; for (size_t index = 0; index < num_buffers; ++index) { if (!info.nvn_buffer_used[index]) { continue; } - define_body(write_func, index, 1, "{}[uint(addr-{})>>2]=data;return;}}"); - define_body(write_func_64, index, 2, - "{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;return;}}"); - define_body(write_func_128, index, 4, - "{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;{}[uint(" - "addr-{}+8)>>2]=data.z;{}[uint(addr-{}+12)>>2]=data.w;return;}}"); - define_body(load_func, index, 1, "return {}[uint(addr-{})>>2];}}"); - define_body(load_func_64, index, 2, - "return uvec2({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2]);}}"); - define_body(load_func_128, index, 4, - "return uvec4({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2],{}[" - "uint(addr-{}+8)>>2],{}[uint(addr-{}+12)>>2]);}}"); + define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); + define_body(write_func_64, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); + define_body(write_func_128, index, + "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" + "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); + define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); + define_body(load_func_64, index, + "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); + define_body(load_func_128, index, + "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" + "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); } write_func += "}"; write_func_64 += "}"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 5ba39261b..1568bb7cf 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -11,11 +11,8 @@ namespace Shader::Backend::GLSL { namespace { -constexpr const char cas_loop[]{R"(for (;;){{ - uint old_value={}; - {}=atomicCompSwap({},old_value,{}({},{})); - if ({}==old_value){{break;}} -}})"}; +constexpr char cas_loop[]{ + "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"}; void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, std::string_view value, std::string_view function) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index d986e1b1a..b2caa222a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -11,7 +11,7 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view SWIZZLE{"xyzw"}; +constexpr char SWIZZLE[]{"xyzw"}; u32 CbufIndex(u32 offset) { return (offset / 4) % 4; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index bc4363824..500e5c290 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -9,6 +9,18 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var, + std::string_view value, std::string_view bit_offset, u32 num_bits) { + const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)}; + ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits); +} +} // Anonymous namespace + void EmitLoadGlobalU8([[maybe_unused]] EmitContext& ctx) { NotImplemented(); } @@ -125,9 +137,8 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, - binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, - offset_var); + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); } void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, @@ -135,9 +146,8 @@ void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int({}%4)*8,8);", ctx.stage_name, - binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, value, - offset_var); + const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); } void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, @@ -145,9 +155,8 @@ void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", - ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, - value, offset_var); + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); } void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, @@ -155,9 +164,8 @@ void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}_ssbo{}[{}>>2]=bitfieldInsert({}_ssbo{}[{}>>2],{},int(({}>>1)%2)*16,16);", - ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var, - value, offset_var); + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; + SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, From d12f2b8ccf74671224c6f8f90873d74f35625762 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 3 Jun 2021 23:18:38 -0400 Subject: [PATCH 641/770] emit_glsl_image: Use immediate offsets when possible --- .../backend/glsl/emit_glsl_image.cpp | 45 ++++++++++++++----- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index ce3a82656..a62e2b181 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -78,6 +78,28 @@ std::string ShadowSamplerVecCast(TextureType type) { } } +std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { + if (offset.IsImmediate()) { + return fmt::format("int({})", offset.U32()); + } + IR::Inst* const inst{offset.InstRecursive()}; + if (inst->AreAllArgsImmediates()) { + switch (inst->GetOpcode()) { + case IR::Opcode::CompositeConstructU32x2: + return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32()); + case IR::Opcode::CompositeConstructU32x3: + return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32()); + case IR::Opcode::CompositeConstructU32x4: + return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), + inst->Arg(2).U32(), inst->Arg(3).U32()); + default: + break; + } + } + return ctx.var_alloc.Consume(offset); +} + std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { @@ -119,7 +141,7 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { if (!offset.IsEmpty()) { - const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; + const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); } else { @@ -137,8 +159,7 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", - *sparse_inst, texture, coords, CastToIntVec(ctx.var_alloc.Consume(offset), info), - texel, bias); + *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); } else { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, texture, coords, texel, bias); @@ -163,7 +184,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse if (!sparse_inst) { if (!offset.IsEmpty()) { ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, - CastToIntVec(ctx.var_alloc.Consume(offset), info)); + GetOffsetVec(ctx, offset)); } else { ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); } @@ -173,7 +194,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, - CastToIntVec(ctx.var_alloc.Consume(offset), info), texel); + GetOffsetVec(ctx, offset), texel); } else { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, texture, coords, lod_lc, texel); @@ -202,7 +223,7 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto cast{ShadowSamplerVecCast(info.type)}; if (!offset.IsEmpty()) { - const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; + const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, offset_str, bias); @@ -244,7 +265,7 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, const auto texture{Texture(ctx, info, index)}; const auto cast{ShadowSamplerVecCast(info.type)}; if (!offset.IsEmpty()) { - const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)}; + const auto offset_str{GetOffsetVec(ctx, offset)}; if (info.type == TextureType::ColorArrayCube) { ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, offset_str); @@ -279,7 +300,7 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins } if (offset2.IsEmpty()) { ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, - CastToIntVec(ctx.var_alloc.Consume(offset), info), info.gather_component); + GetOffsetVec(ctx, offset), info.gather_component); return; } // PTP @@ -295,8 +316,8 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins } if (offset2.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", - *sparse_inst, texture, CastToIntVec(coords, info), - CastToIntVec(ctx.var_alloc.Consume(offset), info), texel, info.gather_component); + *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), + texel, info.gather_component); } // PTP const auto offsets{PtpOffsets(offset, offset2)}; @@ -322,7 +343,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: } if (offset2.IsEmpty()) { ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, - CastToIntVec(ctx.var_alloc.Consume(offset), info)); + GetOffsetVec(ctx, offset)); return; } // PTP @@ -338,7 +359,7 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: if (offset2.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), dref, - CastToIntVec(ctx.var_alloc.Consume(offset), info), texel); + GetOffsetVec(ctx, offset), texel); } // PTP const auto offsets{PtpOffsets(offset, offset2)}; From 747b8556a4611791c1b0afbb500c77de57adfc54 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 4 Jun 2021 00:46:46 -0400 Subject: [PATCH 642/770] glsl: Use textureGrad fallback when EXT_texture_shadow_lod is unsupported --- .../backend/glsl/emit_context.cpp | 4 +- .../backend/glsl/emit_glsl_image.cpp | 44 ++++++++++++++++--- src/shader_recompiler/profile.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ecc7335ba..76cf0bdf0 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -282,8 +282,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n" - "#extension GL_EXT_texture_shadow_lod : enable\n" "#extension GL_EXT_shader_image_load_formatted : enable\n"; + if (profile.support_gl_texture_shadow_lod) { + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index a62e2b181..6cf0300ab 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -67,14 +68,14 @@ std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInf } } -std::string ShadowSamplerVecCast(TextureType type) { +bool NeedsShadowLodExt(TextureType type) { switch (type) { case TextureType::ColorArray2D: case TextureType::ColorCube: case TextureType::ColorArrayCube: - return "vec4"; + return true; default: - return "vec3"; + return false; } } @@ -221,7 +222,22 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, } const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; - const auto cast{ShadowSamplerVecCast(info.type)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && + ctx.stage != Stage::Fragment && needs_shadow_ext}; + if (use_grad) { + // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { @@ -263,15 +279,29 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; - const auto cast{ShadowSamplerVecCast(info.type)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + if (use_grad) { + // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (info.type == TextureType::ColorArrayCube) { ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, offset_str); } else { - ctx.AddF32("{}=textureLodOffset({},vec3({},{}),{},{});", inst, texture, coords, dref, - lod_lc, offset_str); + ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, + dref, lod_lc, offset_str); } } else { if (info.type == TextureType::ColorArrayCube) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 420117132..3bbd5a531 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -86,6 +86,7 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_vertex_viewport_layer{}; + bool support_gl_texture_shadow_lod{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 77681594a..b4c634d29 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), + .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), From 35e78d558d7c4ecc4f5aeaaee6e5521d39e9b9b6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 4 Jun 2021 02:05:04 -0400 Subject: [PATCH 643/770] glsl: Add cbuf access workaround for devices with component indexing bug --- .../glsl/emit_glsl_context_get_set.cpp | 161 ++++++++++++------ src/shader_recompiler/profile.h | 2 + 2 files changed, 112 insertions(+), 51 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index b2caa222a..83ce6fcbb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -44,95 +44,154 @@ std::string OutputVertexIndex(EmitContext& ctx, std::string_view vertex) { void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, - binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - (offset.U32() % 4) * 8); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, - ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(ftou({}[{}].{}),int({}),8);", inst, cbuf, offset.U32() / 16, + OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32("{}=bitfieldExtract(ftou({}[{}>>4][({}>>2)%4]),int(({}%4)*8),8);", inst, cbuf, + offset_var, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftou({}[{}>>4].{}),int(({}%4)*8),8);", + cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); } } void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name, - binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - (offset.U32() % 4) * 8); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst, - ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}].{}),int({}),8);", inst, cbuf, offset.U32() / 16, + OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}>>4][({}>>2)%4]),int(({}%4)*8),8);", inst, cbuf, + offset_var, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftoi({}[{}>>4].{}),int(({}%4)*8),8);", + cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); } } void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, - binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - ((offset.U32() / 2) % 2) * 16); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" - "2)%2)*16),16);", - inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(ftou({}[{}].{}),int({}),16);", inst, cbuf, offset.U32() / 16, + OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32("{}=bitfieldExtract(ftou({}[{}>>4][({}>>2)%4]),int((({}>>1)%2)*16),16);", inst, + cbuf, offset_var, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftou({}[{}>>4].{}),int((({}>>1)%2)*16),16);", + cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); } } void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name, - binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - ((offset.U32() / 2) % 2) * 16); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/" - "2)%2)*16),16);", - inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var); + ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}].{}),int({}),16);", inst, cbuf, offset.U32() / 16, + OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}>>4][({}>>2)%4]),int((({}>>1)%2)*16),16);", inst, + cbuf, offset_var, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftoi({}[{}>>4].{}),int((({}>>1)%2)*16),16);", + cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); } } void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32("{}=ftou({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(), - offset.U32() / 16, OffsetSwizzle(offset.U32())); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32("{}=ftou({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, binding.U32(), - offset_var, offset_var); + ctx.AddU32("{}=ftou({}[{}].{});", inst, cbuf, offset.U32() / 16, + OffsetSwizzle(offset.U32())); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32("{}=ftou({}[{}>>4][({}>>2)%4]);", inst, cbuf, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=ftou({}[{}>>4].{});", cbuf_offset, swizzle, ret, cbuf, offset_var, + "xyzw"[swizzle]); } } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddF32("{}={}_cbuf{}[{}].{};", inst, ctx.stage_name, binding.U32(), offset.U32() / 16, - OffsetSwizzle(offset.U32())); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddF32("{}={}_cbuf{}[{}/16][({}>>2)%4];", inst, ctx.stage_name, binding.U32(), - offset_var, offset_var); + ctx.AddF32("{}={}[{}].{};", inst, cbuf, offset.U32() / 16, OffsetSwizzle(offset.U32())); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddF32("{}={}[{}>>4][({}>>2)%4];", inst, cbuf, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}={}[{}>>4].{};", cbuf_offset, swizzle, ret, cbuf, offset_var, + "xyzw"[swizzle]); } } void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}].{}),ftou({}_cbuf{}[{}].{}));", inst, - ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()), - ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16, + ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, offset.U32() / 16, + OffsetSwizzle(offset.U32()), cbuf, (offset.U32() + 4) / 16, OffsetSwizzle(offset.U32() + 4)); - } else { - const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}/16][({}/" - "4)%4]),ftou({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));", - inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name, - binding.U32(), offset_var, offset_var); + return; + } + const auto offset_var{ctx.var_alloc.Consume(offset)}; + if (!ctx.profile.has_gl_component_indexing_bug) { + ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", + inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, + swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, + "xyzw"[(swizzle + 1) % 4]); } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3bbd5a531..bc61a911f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -101,6 +101,8 @@ struct Profile { bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; + /// Dynamic vec4 indexing is broken on some OpenGL drivers + bool has_gl_component_indexing_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; }; From d41aef03c74e15fb8927bbae741c099694d14e79 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 4 Jun 2021 13:24:34 -0400 Subject: [PATCH 644/770] glsl: Fix image gather logic --- src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 6cf0300ab..f022c5f30 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -343,11 +343,13 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", *sparse_inst, texture, coords, texel, info.gather_component); + return; } if (offset2.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), texel, info.gather_component); + return; } // PTP const auto offsets{PtpOffsets(offset, offset2)}; @@ -385,11 +387,13 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, texture, coords, dref, texel); + return; } if (offset2.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), dref, GetOffsetVec(ctx, offset), texel); + return; } // PTP const auto offsets{PtpOffsets(offset, offset2)}; From 421847cf1e33d5b95c9aa272bf3cf69afda3d964 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 5 Jun 2021 02:41:29 -0400 Subject: [PATCH 645/770] glsl: Implement image atomics and set layer along with some more cleanup/oversight fixes --- .../backend/glsl/emit_context.cpp | 15 +- .../glsl/emit_glsl_context_get_set.cpp | 12 +- .../backend/glsl/emit_glsl_image.cpp | 188 +++++++++++++++++- .../glsl/emit_glsl_not_implemented.cpp | 143 ------------- src/shader_recompiler/profile.h | 1 - 5 files changed, 204 insertions(+), 155 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 76cf0bdf0..50a7a7447 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -197,7 +197,7 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { if (ctx.info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_gl_vertex_viewport_layer && + if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } @@ -314,7 +314,7 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } - if (info.stores_viewport_index && profile.support_gl_vertex_viewport_layer && + if (info.stores_viewport_index && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } @@ -497,12 +497,13 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { void EmitContext::SetupImages(Bindings& bindings) { image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); for (const auto& desc : info.image_buffer_descriptors) { - const auto indices{bindings.image + desc.count}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform uimageBuffer img{};", bindings.image, - index); - } image_buffer_bindings.push_back(bindings.image); + const auto indices{bindings.image + desc.count}; + const auto format{ImageFormatString(desc.format)}; + for (u32 index = bindings.image; index < indices; ++index) { + header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{};", + bindings.image, format, index); + } bindings.image += desc.count; } image_bindings.reserve(info.image_descriptors.size()); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 83ce6fcbb..4d35be152 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -269,6 +269,15 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val const u32 element{static_cast(attr) % 4}; const char swizzle{"xyzw"[element]}; switch (attr) { + case IR::Attribute::Layer: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + // LOG_WARNING(..., "Shader stores viewport layer but device does not support viewport + // layer extension"); + break; + } + ctx.Add("gl_Layer=ftoi({});", value); + break; case IR::Attribute::PointSize: ctx.Add("gl_PointSize={};", value); break; @@ -279,7 +288,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_Position.{}={};", swizzle, value); break; case IR::Attribute::ViewportIndex: - if (ctx.stage != Stage::Geometry && !ctx.profile.support_gl_vertex_viewport_layer) { + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport // layer extension"); break; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index f022c5f30..e3a69e3a5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -45,7 +45,7 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info case TextureType::ColorArrayCube: return fmt::format("ivec4({})", value); default: - throw NotImplementedException("Offset type {}", info.type.Value()); + throw NotImplementedException("Integer cast for TextureType {}", info.type.Value()); } } @@ -64,7 +64,7 @@ std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInf case TextureType::ColorArrayCube: return fmt::format("ivec4({})", value); default: - throw NotImplementedException("Offset type {}", info.type.Value()); + throw NotImplementedException("TexelFetchCast type {}", info.type.Value()); } } @@ -98,7 +98,19 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { break; } } - return ctx.var_alloc.Consume(offset); + const auto offset_str{ctx.var_alloc.Consume(offset)}; + switch (offset.Type()) { + case IR::Type::U32: + return fmt::format("int({})", offset_str); + case IR::Type::U32x2: + return fmt::format("ivec2({})", offset_str); + case IR::Type::U32x3: + return fmt::format("ivec3({})", offset_str); + case IR::Type::U32x4: + return fmt::format("ivec4({})", offset_str); + default: + throw NotImplementedException("Offset type {}", offset.Type()); + } } std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { @@ -528,6 +540,88 @@ void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); } +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, + std::string_view) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, TexelFetchCastToInt(coords, info), + value); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + const auto info{inst.Flags()}; + const auto image{Image(ctx, info, index)}; + ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, TexelFetchCastToInt(coords, info), + value); +} + void EmitBindlessImageSampleImplicitLod(EmitContext&) { NotImplemented(); } @@ -624,4 +718,92 @@ void EmitBoundImageWrite(EmitContext&) { NotImplemented(); } +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index c64d4325d..5ca73610b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -250,147 +250,4 @@ void EmitGetInBoundsFromOp(EmitContext& ctx) { NotImplemented(); } -void EmitBindlessImageAtomicIAdd32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicSMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicUMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicSMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicUMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicInc32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicDec32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicAnd32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicOr32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicXor32(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageAtomicExchange32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicIAdd32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicSMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicUMin32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicSMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicUMax32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicInc32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicDec32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicAnd32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicOr32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicXor32(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageAtomicExchange32(EmitContext&) { - NotImplemented(); -} - -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { - NotImplemented(); -} - } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index bc61a911f..3a4495070 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -85,7 +85,6 @@ struct Profile { bool support_derivative_control{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; - bool support_gl_vertex_viewport_layer{}; bool support_gl_texture_shadow_lod{}; bool warp_size_potentially_larger_than_guest{}; From 465903468e29ee93845d69bf56273a562dd01242 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 15:52:44 -0400 Subject: [PATCH 646/770] glsl: Implement barriers --- .../backend/glsl/emit_glsl_atomic.cpp | 1 - .../backend/glsl/emit_glsl_barriers.cpp | 21 +++++++++++++++++++ .../glsl/emit_glsl_not_implemented.cpp | 12 ----------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 1568bb7cf..7353d5d4e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -1,4 +1,3 @@ - // Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp index e69de29bb..e1d1b558e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +void EmitBarrier(EmitContext& ctx) { + ctx.Add("barrier();"); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + ctx.Add("groupMemoryBarrier();"); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + ctx.Add("memoryBarrier();"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 5ca73610b..37e4b453b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -80,18 +80,6 @@ void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_la ctx.Add("discard;"); } -void EmitBarrier(EmitContext& ctx) { - NotImplemented(); -} - -void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { - NotImplemented(); -} - -void EmitDeviceMemoryBarrier(EmitContext& ctx) { - NotImplemented(); -} - void EmitPrologue(EmitContext& ctx) {} void EmitEpilogue(EmitContext& ctx) {} From 258106038e921de6863d3e871a82fb7a0c77f10d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 18:04:01 -0400 Subject: [PATCH 647/770] glsl: Allow dynamic tracking of variable allocation --- .../backend/glsl/emit_glsl.cpp | 17 +++++++--- .../backend/glsl/var_alloc.cpp | 34 +++++++++++-------- .../backend/glsl/var_alloc.h | 5 ++- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 7b57c1e91..b189f6c11 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -172,19 +172,28 @@ std::string GlslVersionSpecifier(const EmitContext& ctx) { return ""; } +bool IsPreciseType(GlslVarType type) { + switch (type) { + case GlslVarType::PrecF32: + case GlslVarType::PrecF64: + return true; + default: + return false; + } +} + void DefineVariables(const EmitContext& ctx, std::string& header) { for (u32 i = 0; i < static_cast(GlslVarType::Void); ++i) { const auto type{static_cast(i)}; const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; const auto type_name{ctx.var_alloc.GetGlslType(type)}; - const auto precise{ - (type == GlslVarType::PrecF32 || type == GlslVarType::PrecF64) ? "precise " : ""}; + const auto precise{IsPreciseType(type) ? "precise " : ""}; // Temps/return types that are never used are stored at index 0 if (tracker.uses_temp) { - header += fmt::format("{}{} {}={}(0);", precise, type_name, + header += fmt::format("{}{} t{}={}(0);", precise, type_name, ctx.var_alloc.Representation(0, type), type_name); } - for (u32 index = 1; index <= tracker.num_used; ++index) { + for (u32 index = 0; index < tracker.num_used; ++index) { header += fmt::format("{}{} {}={}(0);", precise, type_name, ctx.var_alloc.Representation(index, type), type_name); } diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 0ae56651e..95e8233e2 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -116,8 +116,8 @@ std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { id.type.Assign(type); GetUseTracker(type).uses_temp = true; inst.SetDefinition(id); + return "t" + Representation(inst.Definition()); } - return Representation(inst.Definition()); } std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { @@ -156,21 +156,27 @@ std::string VarAlloc::GetGlslType(IR::Type type) const { Id VarAlloc::Alloc(GlslVarType type) { auto& use_tracker{GetUseTracker(type)}; - if (use_tracker.num_used < NUM_VARS) { - for (size_t var = 1; var < NUM_VARS; ++var) { - if (use_tracker.var_use[var]) { - continue; - } - use_tracker.num_used = std::max(use_tracker.num_used, var + 1); - use_tracker.var_use[var] = true; - Id ret{}; - ret.is_valid.Assign(1); - ret.type.Assign(type); - ret.index.Assign(static_cast(var)); - return ret; + const auto num_vars{use_tracker.var_use.size()}; + for (size_t var = 0; var < num_vars; ++var) { + if (use_tracker.var_use[var]) { + continue; } + use_tracker.num_used = std::max(use_tracker.num_used, var + 1); + use_tracker.var_use[var] = true; + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast(var)); + return ret; } - throw NotImplementedException("Variable spilling"); + // Allocate a new variable + use_tracker.var_use.push_back(true); + Id ret{}; + ret.is_valid.Assign(1); + ret.type.Assign(type); + ret.index.Assign(static_cast(use_tracker.num_used)); + ++use_tracker.num_used; + return ret; } void VarAlloc::Free(Id id) { diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h index ed936f8dc..ab1d1acbd 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.h +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -57,11 +57,10 @@ static_assert(sizeof(Id) == sizeof(u32)); class VarAlloc { public: - static constexpr size_t NUM_VARS = 1023; struct UseTracker { - size_t num_used{}; - std::bitset var_use{}; bool uses_temp{}; + size_t num_used{}; + std::vector var_use; }; /// Used for explicit usages of variables, may revert to temporaries From fb839061fbe23593455ac0c9e83c0a08831fd5db Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 18:28:42 -0400 Subject: [PATCH 648/770] glsl: Remove output generic indexing for geometry stage --- .../backend/glsl/emit_glsl_context_get_set.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 4d35be152..cfcdd45a2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -30,10 +30,8 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } -std::string OutputVertexIndex(EmitContext& ctx, std::string_view vertex) { +std::string OutputVertexIndex(EmitContext& ctx) { switch (ctx.stage) { - case Stage::Geometry: - return fmt::format("[{}]", vertex); case Stage::TessellationControl: return "[gl_InvocationID]"; default: @@ -252,12 +250,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex) { + [[maybe_unused]] std::string_view vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const u32 element{IR::GenericAttributeElement(attr)}; const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; - const auto output_decorator{OutputVertexIndex(ctx, vertex)}; + const auto output_decorator{OutputVertexIndex(ctx)}; if (info.num_components == 1) { ctx.Add("{}{}={};", info.name, output_decorator, value); } else { From fc0db612abedbba8245f94d03049f65d78819b26 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 18:29:13 -0400 Subject: [PATCH 649/770] glsl: Conditionally use GL_EXT_shader_image_load_formatted Fix for SULD.D --- .../backend/glsl/emit_context.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 50a7a7447..8cbb2ec57 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -206,6 +206,20 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { header += "out int gl_ViewportIndex;"; } } + +bool UsesTyplessImage(const Info& info) { + for (const auto& desc : info.image_buffer_descriptors) { + if (desc.format == ImageFormat::Typeless) { + return true; + } + } + for (const auto& desc : info.image_descriptors) { + if (desc.format == ImageFormat::Typeless) { + return true; + } + } + return false; +} } // Anonymous namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -281,8 +295,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage - header += "#extension GL_ARB_sparse_texture2 : enable\n" - "#extension GL_EXT_shader_image_load_formatted : enable\n"; + header += "#extension GL_ARB_sparse_texture2 : enable\n"; if (profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } @@ -318,6 +331,9 @@ void EmitContext::SetupExtensions(std::string&) { stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } + if (UsesTyplessImage(info)) { + header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { From 970fc39d986c5eefa1c4b61ac89ef7e8c2bf23bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 19:05:11 -0400 Subject: [PATCH 650/770] glsl: Rebase fixes --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_util.cpp | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4c634d29..3d229a78c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,7 +225,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 99cb81819..ac6f33e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader) { +static void LogShader(GLuint shader, std::optional code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,6 +28,9 @@ static void LogShader(GLuint shader) { glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); + if (code.has_value()) { + LOG_INFO(Render_OpenGL, "\n{}", *code); + } } else { LOG_WARNING(Render_OpenGL, "{}", log); } @@ -43,7 +46,7 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { glCompileShader(shader.handle); glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { - LogShader(shader.handle); + LogShader(shader.handle, code); } } From 2a504b4765320882be8b4f2bde5f90262f76fd9f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 19:20:32 -0400 Subject: [PATCH 651/770] glsl: Conditionally add GL_ARB_sparse_texture2 --- src/shader_recompiler/backend/glsl/emit_context.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 8cbb2ec57..d6b3c7aba 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -294,8 +294,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } void EmitContext::SetupExtensions(std::string&) { - // TODO: track this usage - header += "#extension GL_ARB_sparse_texture2 : enable\n"; if (profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } @@ -331,6 +329,9 @@ void EmitContext::SetupExtensions(std::string&) { stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } + if (info.uses_sparse_residency) { + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + } if (UsesTyplessImage(info)) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } From c5422041134ed2645e7cd32152e36f9d04c66da3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 20:39:30 -0400 Subject: [PATCH 652/770] glsl: Implement indexed attribute loads --- .../backend/glsl/emit_context.cpp | 42 +++++++++++++++++++ .../backend/glsl/emit_context.h | 1 + .../glsl/emit_glsl_context_get_set.cpp | 38 ++++++++--------- .../backend/glsl/emit_glsl_instructions.h | 3 +- .../glsl/emit_glsl_not_implemented.cpp | 9 ---- 5 files changed, 64 insertions(+), 29 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index d6b3c7aba..ed0955da0 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -104,8 +104,22 @@ std::string_view SamplerType(TextureType type, bool is_depth) { std::string_view ImageType(TextureType type) { switch (type) { + case TextureType::Color1D: + return "uimage1D"; + case TextureType::ColorArray1D: + return "uimage1DArray"; case TextureType::Color2D: return "uimage2D"; + case TextureType::ColorArray2D: + return "uimage2DArray"; + case TextureType::Color3D: + return "uimage3D"; + case TextureType::ColorCube: + return "uimageCube"; + case TextureType::ColorArrayCube: + return "uimageCubeArray"; + case TextureType::Buffer: + return "uimageBuffer"; default: throw NotImplementedException("Image type: {}", type); } @@ -250,6 +264,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; case Stage::Fragment: stage_name = "fs"; + position_name = "gl_FragCoord"; break; case Stage::Compute: stage_name = "cs"; @@ -449,6 +464,33 @@ void EmitContext::DefineHelperFunctions() { if (info.uses_global_memory) { header += DefineGlobalMemoryFunctions(); } + if (info.loads_indexed_attributes) { + const bool is_array{stage == Stage::Geometry}; + const auto vertex_arg{is_array ? ",uint vertex" : ""}; + std::string func{ + fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " + "masked_index=uint(base_index)&3u;switch(base_index>>2){{", + vertex_arg)}; + if (info.loads_position) { + func += fmt::format("case {}:", static_cast(IR::Attribute::PositionX) >> 2); + const auto position_idx{is_array ? "gl_in[vertex]." : ""}; + func += fmt::format("return {}{}[masked_index];", position_idx, position_name); + } + const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; + for (u32 i = 0; i < info.input_generics.size(); ++i) { + if (!info.input_generics[i].used) { + continue; + } + const auto vertex_idx{is_array ? "[vertex]" : ""}; + func += fmt::format("case {}:", base_attribute_value + i); + func += fmt::format("return in_attr{}{}[masked_index];", i, vertex_idx); + } + func += "default: return 0.0;}}"; + header += func; + } + if (info.stores_indexed_attributes) { + // TODO + } } std::string EmitContext::DefineGlobalMemoryFunctions() { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 9bdca184f..dce99586e 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -150,6 +150,7 @@ public: Stage stage{}; std::string_view stage_name = "invalid"; + std::string_view position_name = "gl_Position"; std::vector texture_buffer_bindings; std::vector image_buffer_bindings; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index cfcdd45a2..d09187ea7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -206,26 +206,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - switch (ctx.stage) { - case Stage::VertexA: - case Stage::VertexB: - ctx.AddF32("{}=gl_Position.{};", inst, swizzle); - break; - case Stage::TessellationEval: - ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); - break; - case Stage::TessellationControl: - case Stage::Geometry: - ctx.AddF32("{}=gl_in[{}].gl_Position.{};", inst, vertex, swizzle); - break; - case Stage::Fragment: - ctx.AddF32("{}=gl_FragCoord.{};", inst, swizzle); - break; - default: - throw NotImplementedException("Get Position for stage {}", ctx.stage); - } + case IR::Attribute::PositionW: { + const bool is_array{IsInputArray(ctx.stage)}; + const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""}; + ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); break; + } case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); @@ -311,6 +297,20 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val } } +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex) { + const bool is_array{ctx.stage == Stage::Geometry}; + const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""}; + ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg); +} + +void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view value, + [[maybe_unused]] std::string_view vertex) { + NotImplemented(); +} + void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Non-generic patch load"); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 90dcfcef7..12094139f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -72,7 +72,8 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); -void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, + std::string_view vertex); void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, std::string_view vertex); void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 37e4b453b..b292db9d4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -124,15 +124,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { - NotImplemented(); -} - -void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, - std::string_view vertex) { - NotImplemented(); -} - void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { NotImplemented(); } From 8bb8bbf4ae2ef259857efe49436dfd71758ea092 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 01:55:12 -0400 Subject: [PATCH 653/770] glsl: Implement fswzadd and wip nv thread shuffle impl --- .../backend/glsl/emit_context.cpp | 11 ++++++ .../backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_warp.cpp | 36 ++++++++++++++++--- src/shader_recompiler/profile.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 1 + 5 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ed0955da0..6c2828644 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -306,6 +306,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupImages(bindings); SetupTextures(bindings); DefineHelperFunctions(); + DefineConstants(); } void EmitContext::SetupExtensions(std::string&) { @@ -339,6 +340,9 @@ void EmitContext::SetupExtensions(std::string&) { if (!info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } + if (profile.support_gl_warp_intrinsics) { + header += "#extension GL_NV_shader_thread_shuffle : enable\n"; + } } if (info.stores_viewport_index && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { @@ -605,4 +609,11 @@ void EmitContext::SetupTextures(Bindings& bindings) { } } +void EmitContext::DefineConstants() { + if (info.uses_fswzadd) { + header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" + "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; + } +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index dce99586e..2b0d22ce5 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -167,6 +167,7 @@ private: void DefineStorageBuffers(Bindings& bindings); void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); + void DefineConstants(); std::string DefineGlobalMemoryFunctions(); void SetupImages(Bindings& bindings); void SetupTextures(Bindings& bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 38c49b164..6ced0776c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -35,9 +35,17 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); } + +void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, + std::string_view value, std::string_view index, + [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { + const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; + ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); + SetInBoundsFlag(ctx, inst); +} } // namespace -void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); } @@ -103,6 +111,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); + return; + } const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; const auto thread_id{"gl_SubGroupInvocationARB"}; const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; @@ -117,6 +129,10 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; @@ -128,6 +144,10 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; @@ -139,6 +159,10 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; @@ -147,10 +171,12 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } -void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, - [[maybe_unused]] std::string_view swizzle) { - NotImplemented(); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; + const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); + const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); + ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3a4495070..246995190 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -86,6 +86,7 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; + bool support_gl_warp_intrinsics{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d229a78c..4fcf4e458 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_warp_intrinsics = false, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), From 3f31a547e04b9899f1e572faa7a830aca107033e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 21:22:50 -0400 Subject: [PATCH 654/770] glsl: Implement more attribute getters and setters --- .../backend/glsl/emit_context.cpp | 4 ++ .../glsl/emit_glsl_context_get_set.cpp | 68 +++++++++++++++---- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 6c2828644..e0d678554 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -239,6 +239,7 @@ bool UsesTyplessImage(const Info& info) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + header += "#pragma optionNV(fastmath off)\n"; SetupExtensions(header); stage = program.stage; switch (program.stage) { @@ -351,6 +352,9 @@ void EmitContext::SetupExtensions(std::string&) { if (info.uses_sparse_residency) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + header += "#extension GL_NV_viewport_array2 : enable\n"; + } if (UsesTyplessImage(info)) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index d09187ea7..c9a2ceb3d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -203,6 +203,9 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, return; } switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); + break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: @@ -212,10 +215,20 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); break; } + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.AddF32("{}=gl_FrontMaterial.diffuse.{};", inst, swizzle); + break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); + break; case IR::Attribute::InstanceId: ctx.AddF32("{}=itof(gl_InstanceID);", inst); break; @@ -225,10 +238,6 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::FrontFace: ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); - break; default: fmt::print("Get attribute {}", attr); throw NotImplementedException("Get attribute {}", attr); @@ -262,6 +271,23 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val } ctx.Add("gl_Layer=ftoi({});", value); break; + case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && + !ctx.profile.support_viewport_index_layer_non_geometry) { + // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport + // layer extension"); + break; + } + ctx.Add("gl_ViewportIndex=ftoi({});", value); + break; + case IR::Attribute::ViewportMask: + if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { + // LOG_WARNING(..., "Shader stores viewport mask but device does not support viewport + // mask extension"); + break; + } + ctx.Add("gl_ViewportMask[0]=ftoi({});", value); + break; case IR::Attribute::PointSize: ctx.Add("gl_PointSize={};", value); break; @@ -271,14 +297,32 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::PositionW: ctx.Add("gl_Position.{}={};", swizzle, value); break; - case IR::Attribute::ViewportIndex: - if (ctx.stage != Stage::Geometry && - !ctx.profile.support_viewport_index_layer_non_geometry) { - // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport - // layer extension"); - break; - } - ctx.Add("gl_ViewportIndex=ftoi({});", value); + case IR::Attribute::ColorFrontDiffuseR: + case IR::Attribute::ColorFrontDiffuseG: + case IR::Attribute::ColorFrontDiffuseB: + case IR::Attribute::ColorFrontDiffuseA: + ctx.Add("gl_FrontMaterial.diffuse.{}={};", swizzle, value); + break; + case IR::Attribute::ColorFrontSpecularR: + case IR::Attribute::ColorFrontSpecularG: + case IR::Attribute::ColorFrontSpecularB: + case IR::Attribute::ColorFrontSpecularA: + ctx.Add("gl_FrontMaterial.specular.{}={};", swizzle, value); + break; + case IR::Attribute::ColorBackDiffuseR: + case IR::Attribute::ColorBackDiffuseG: + case IR::Attribute::ColorBackDiffuseB: + case IR::Attribute::ColorBackDiffuseA: + ctx.Add("gl_BackMaterial.diffuse.{}={};", swizzle, value); + break; + case IR::Attribute::ColorBackSpecularR: + case IR::Attribute::ColorBackSpecularG: + case IR::Attribute::ColorBackSpecularB: + case IR::Attribute::ColorBackSpecularA: + ctx.Add("gl_BackMaterial.specular.{}={};", swizzle, value); + break; + case IR::Attribute::FogCoordinate: + ctx.Add("gl_FragCoord.x={};", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: From 6650c4799d42044f087a1ac5cb5e4b1a9e899000 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 23:52:28 -0400 Subject: [PATCH 655/770] gl_rasterizer: Add texture fetch barrier for fragments Fixes flicker seen in XC2 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 54696d97d..7513bd071 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -437,7 +437,7 @@ void RasterizerOpenGL::WaitForIdle() { } void RasterizerOpenGL::FragmentBarrier() { - glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); } void RasterizerOpenGL::TiledCacheBarrier() { From 14bd73db360c0cec61dd2e211dcde49b2197e425 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 9 Jun 2021 01:16:25 -0400 Subject: [PATCH 656/770] glsl: Enable early fragment tests --- src/shader_recompiler/backend/glsl/emit_context.cpp | 3 +++ .../backend/glsl/emit_glsl_floating_point.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e0d678554..a24fa46c5 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -266,6 +266,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile case Stage::Fragment: stage_name = "fs"; position_name = "gl_FragCoord"; + if (runtime_info.force_early_z) { + header += "layout(early_fragment_tests)in;"; + } break; case Stage::Compute: stage_name = "cs"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index adeafdd3d..fbf66015f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -47,17 +47,17 @@ void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { if (Precise(inst)) { - ctx.AddPrecF32("{}=float({})+float({});", inst, a, b); + ctx.AddPrecF32("{}={}+{};", inst, a, b); } else { - ctx.AddF32("{}=float({})+float({});", inst, a, b); + ctx.AddF32("{}={}+{};", inst, a, b); } } void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { if (Precise(inst)) { - ctx.AddPrecF64("{}=double({})+double({});", inst, a, b); + ctx.AddPrecF64("{}={}+{};", inst, a, b); } else { - ctx.AddF64("{}=double({})+double({});", inst, a, b); + ctx.AddF64("{}={}+{};", inst, a, b); } } From a92669523483eba151d07f7e9655fc5f25af7fcd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 9 Jun 2021 01:55:36 -0400 Subject: [PATCH 657/770] glsl: Use existing tracking for enabling EXT_shader_image_load_formatted --- .../backend/glsl/emit_context.cpp | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index a24fa46c5..e69a56d46 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -220,20 +220,6 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { header += "out int gl_ViewportIndex;"; } } - -bool UsesTyplessImage(const Info& info) { - for (const auto& desc : info.image_buffer_descriptors) { - if (desc.format == ImageFormat::Typeless) { - return true; - } - } - for (const auto& desc : info.image_descriptors) { - if (desc.format == ImageFormat::Typeless) { - return true; - } - } - return false; -} } // Anonymous namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -358,7 +344,7 @@ void EmitContext::SetupExtensions(std::string&) { if (info.stores_viewport_mask && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } - if (UsesTyplessImage(info)) { + if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } } From d1a68f7997ce4986e022031e02f6062c370a56a0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 9 Jun 2021 23:11:08 -0400 Subject: [PATCH 658/770] glsl: Add gl_PerVertex in for GS --- src/shader_recompiler/backend/glsl/emit_context.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e69a56d46..fdbe2986c 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -245,7 +245,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; case Stage::Geometry: stage_name = "gs"; - header += fmt::format("layout({})in;layout({},max_vertices={})out;", + header += fmt::format("layout({})in;layout({},max_vertices={})out;" + "in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", InputPrimitive(runtime_info.input_topology), OutputPrimitive(program.output_topology), program.output_vertices); break; From e7c8f8911f38b29c0725b76db75ce6d6d857c5f9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 10 Jun 2021 00:01:56 -0400 Subject: [PATCH 659/770] glsl: Implement SampleId and SetSampleMask plus some minor refactoring of implementations --- .../backend/glsl/emit_context.cpp | 6 ++++ .../glsl/emit_glsl_context_get_set.cpp | 30 ++++++++++++++++++- .../glsl/emit_glsl_not_implemented.cpp | 29 ------------------ 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index fdbe2986c..484548467 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -256,6 +256,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (runtime_info.force_early_z) { header += "layout(early_fragment_tests)in;"; } + if (info.uses_sample_id) { + header += "in int gl_SampleID;"; + } + if (info.stores_sample_mask) { + header += "out int gl_SampleMask[];"; + } break; case Stage::Compute: stage_name = "cs"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index c9a2ceb3d..0546c1c81 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -239,7 +239,6 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); break; default: - fmt::print("Get attribute {}", attr); throw NotImplementedException("Get attribute {}", attr); } } @@ -397,10 +396,39 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_vi ctx.Add("frag_color{}.{}={};", index, swizzle, value); } +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_SampleMask[0]=int({})", value); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + ctx.Add("gl_FragDepth={};", value); +} + void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); } +void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_WorkGroupID;", inst); +} + +void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_InvocationID);", inst); +} + +void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=uint(gl_SampleID);", inst); +} + +void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=gl_HelperInvocation;", inst); +} + +void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { + ctx.uses_y_direction = true; + ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); +} + void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { ctx.AddU32("{}=lmem[{}];", inst, word_offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b292db9d4..f17a07955 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -124,14 +124,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { - NotImplemented(); -} - -void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { - ctx.Add("gl_FragDepth={};", value); -} - void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } @@ -164,27 +156,6 @@ void EmitSetOFlag(EmitContext& ctx) { NotImplemented(); } -void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32x3("{}=gl_WorkGroupID;", inst); -} - -void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uint(gl_InvocationID);", inst); -} - -void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); -} - -void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU1("{}=gl_HelperInvocation;", inst); -} - -void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { - ctx.uses_y_direction = true; - ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); -} - void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { ctx.AddU1("{}=false;", inst); } From 85399e119d6d61375fd9304d69bdfb3a85522d2a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 10 Jun 2021 00:29:19 -0400 Subject: [PATCH 660/770] glsl: Reorganize backend code, remove unneeded [[maybe_unused]] --- src/shader_recompiler/CMakeLists.txt | 1 - .../backend/glsl/emit_glsl_control_flow.cpp | 22 +++ .../backend/glsl/emit_glsl_convert.cpp | 66 +++---- .../backend/glsl/emit_glsl_image.cpp | 170 ++++++++---------- .../backend/glsl/emit_glsl_image_atomic.cpp | 0 .../backend/glsl/emit_glsl_instructions.h | 7 - .../backend/glsl/emit_glsl_integer.cpp | 6 +- .../backend/glsl/emit_glsl_memory.cpp | 66 +++---- .../glsl/emit_glsl_not_implemented.cpp | 98 ---------- .../backend/glsl/emit_glsl_shared_memory.cpp | 37 ++-- .../backend/glsl/emit_glsl_special.cpp | 61 +++++++ .../backend/glsl/emit_glsl_undefined.cpp | 32 ++++ 12 files changed, 251 insertions(+), 315 deletions(-) delete mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index e89a0ee16..06ee50fff 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -37,7 +37,6 @@ add_library(shader_recompiler STATIC backend/glsl/emit_glsl_convert.cpp backend/glsl/emit_glsl_floating_point.cpp backend/glsl/emit_glsl_image.cpp - backend/glsl/emit_glsl_image_atomic.cpp backend/glsl/emit_glsl_instructions.h backend/glsl/emit_glsl_integer.cpp backend/glsl/emit_glsl_logical.cpp diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp index e69de29bb..59522fdbd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Backend::GLSL { + +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx, + [[maybe_unused]] std::string_view continue_label) { + ctx.Add("discard;"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 9ed5bb319..8d823e466 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -14,8 +14,7 @@ void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertS16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddS32("{}=int(float({}))&0xffff;", inst, value); } @@ -29,13 +28,11 @@ void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertS32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddS32("{}=int(float({}));", inst, value); } -void EmitConvertS32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddS32("{}=int(double({}));", inst, value); } @@ -44,13 +41,11 @@ void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertS64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddS64("{}=int64_t(double(float({})));", inst, value); } -void EmitConvertS64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddS64("{}=int64_t(double({}));", inst, value); } @@ -74,13 +69,11 @@ void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertU32F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=uint(float({}));", inst, value); } -void EmitConvertU32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=uint(double({}));", inst, value); } @@ -89,23 +82,19 @@ void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertU64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU64("{}=uint64_t(float({}));", inst, value); } -void EmitConvertU64F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU64("{}=uint64_t(double({}));", inst, value); } -void EmitConvertU64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU64("{}=uint64_t(uint({}));", inst, value); } -void EmitConvertU32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=uint(uint64_t({}));", inst, value); } @@ -119,13 +108,11 @@ void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertF32F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=float(double({}));", inst, value); } -void EmitConvertF64F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=double(float({}));", inst, value); } @@ -179,13 +166,11 @@ void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertF32S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=float(int({}));", inst, value); } -void EmitConvertF32S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=float(double(int64_t({})));", inst, value); } @@ -194,18 +179,15 @@ void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In NotImplemented(); } -void EmitConvertF32U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=float(uint({}&0xffff));", inst, value); } -void EmitConvertF32U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=float(uint({}));", inst, value); } -void EmitConvertF32U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF32("{}=float(double(uint64_t({})));", inst, value); } @@ -219,13 +201,11 @@ void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertF64S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=double(int({}));", inst, value); } -void EmitConvertF64S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=double(int64_t({}));", inst, value); } @@ -239,13 +219,11 @@ void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I NotImplemented(); } -void EmitConvertF64U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=double(uint({}));", inst, value); } -void EmitConvertF64U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddF64("{}=double(uint64_t({}));", inst, value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index e3a69e3a5..00fe288e2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -12,8 +12,7 @@ namespace Shader::Backend::GLSL { namespace { -std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, - [[maybe_unused]] const IR::Value& index) { +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info) { if (info.type == TextureType::Buffer) { return fmt::format("tex{}", ctx.texture_buffer_bindings.at(info.descriptor_index)); } else { @@ -21,8 +20,7 @@ std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, } } -std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, - [[maybe_unused]] const IR::Value& index) { +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info) { if (info.type == TextureType::Buffer) { return fmt::format("img{}", ctx.image_buffer_bindings.at(info.descriptor_index)); } else { @@ -139,16 +137,14 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } } // Anonymous namespace -void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view bias_lc, - [[maybe_unused]] const IR::Value& offset) { +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; @@ -179,11 +175,9 @@ void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse } } -void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view lod_lc, - [[maybe_unused]] const IR::Value& offset) { +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view lod_lc, const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_bias) { throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); @@ -191,7 +185,7 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -214,13 +208,10 @@ void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unuse } } -void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view dref, - [[maybe_unused]] std::string_view bias_lc, - [[maybe_unused]] const IR::Value& offset) { + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { @@ -232,7 +223,7 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -272,13 +263,10 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, } } -void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] IR::Inst& inst, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view dref, - [[maybe_unused]] std::string_view lod_lc, - [[maybe_unused]] const IR::Value& offset) { + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { @@ -290,7 +278,7 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -325,13 +313,10 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, } } -void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] const IR::Value& offset2) { +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -370,14 +355,11 @@ void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Ins info.gather_component); } -void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] const IR::Value& offset2, - [[maybe_unused]] std::string_view dref) { +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -413,10 +395,8 @@ void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR: *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); } -void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view lod, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, [[maybe_unused]] std::string_view ms) { const auto info{inst.Flags()}; if (info.has_bias) { @@ -425,7 +405,7 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst if (info.has_lod_clamp) { throw NotImplementedException("EmitImageFetch Lod clamp samples"); } - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; if (!sparse_inst) { @@ -453,11 +433,10 @@ void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst } } -void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view lod) { +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view lod) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; switch (info.type) { case TextureType::Color1D: return ctx.AddU32x4( @@ -481,20 +460,16 @@ void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused] throw LogicError("Unspecified image type {}", info.type.Value()); } -void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords) { +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); } -void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] const IR::Value& derivatives, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] const IR::Value& lod_clamp) { +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords, const IR::Value& derivatives, + const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { const auto info{inst.Flags()}; if (info.has_lod_clamp) { throw NotImplementedException("EmitImageGradient Lod clamp samples"); @@ -506,7 +481,7 @@ void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I if (!offset.IsEmpty()) { throw NotImplementedException("EmitImageGradient offset"); } - const auto texture{Texture(ctx, info, index)}; + const auto texture{Texture(ctx, info)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; @@ -519,63 +494,65 @@ void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } } -void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords) { +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords) { const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { throw NotImplementedException("EmitImageRead Sparse"); } - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, TexelFetchCastToInt(coords, info)); } -void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, - [[maybe_unused]] std::string_view coords, - [[maybe_unused]] std::string_view color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, + std::string_view coords, std::string_view color) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); } -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } @@ -590,34 +567,35 @@ void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view value) { +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, std::string_view coords, + std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info, index)}; + const auto image{Image(ctx, info)}; ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 12094139f..1257575c0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -30,14 +30,7 @@ void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitReference(EmitContext& ctx, const IR::Value& value); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); -void EmitBranch(EmitContext& ctx, std::string_view label); -void EmitBranchConditional(EmitContext& ctx, std::string_view condition, - std::string_view true_label, std::string_view false_label); -void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); -void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); -void EmitReturn(EmitContext& ctx); void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 1c7413cd4..6e04c6cb2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -156,13 +156,11 @@ void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) ctx.AddU32("{}=~{};", inst, value); } -void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=findMSB(int({}));", inst, value); } -void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view value) { +void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=findMSB(uint({}));", inst, value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 500e5c290..af3dadf71 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -21,19 +21,19 @@ void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view o } } // Anonymous namespace -void EmitLoadGlobalU8([[maybe_unused]] EmitContext& ctx) { +void EmitLoadGlobalU8(EmitContext&) { NotImplemented(); } -void EmitLoadGlobalS8([[maybe_unused]] EmitContext& ctx) { +void EmitLoadGlobalS8(EmitContext&) { NotImplemented(); } -void EmitLoadGlobalU16([[maybe_unused]] EmitContext& ctx) { +void EmitLoadGlobalU16(EmitContext&) { NotImplemented(); } -void EmitLoadGlobalS16([[maybe_unused]] EmitContext& ctx) { +void EmitLoadGlobalS16(EmitContext&) { NotImplemented(); } @@ -49,19 +49,19 @@ void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view addres ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); } -void EmitWriteGlobalU8([[maybe_unused]] EmitContext& ctx) { +void EmitWriteGlobalU8(EmitContext&) { NotImplemented(); } -void EmitWriteGlobalS8([[maybe_unused]] EmitContext& ctx) { +void EmitWriteGlobalS8(EmitContext&) { NotImplemented(); } -void EmitWriteGlobalU16([[maybe_unused]] EmitContext& ctx) { +void EmitWriteGlobalU16(EmitContext&) { NotImplemented(); } -void EmitWriteGlobalS16([[maybe_unused]] EmitContext& ctx) { +void EmitWriteGlobalS16(EmitContext&) { NotImplemented(); } @@ -77,33 +77,29 @@ void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_ ctx.Add("WriteGlobal128({},{});", address, value); } -void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } -void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } -void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } -void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); @@ -132,37 +128,29 @@ void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi binding.U32(), offset_var); } -void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); } -void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); } -void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); } -void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); @@ -181,10 +169,8 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); } -void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { const auto offset_var{ctx.var_alloc.Consume(offset)}; ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index f17a07955..f420fe388 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -14,84 +14,6 @@ namespace Shader::Backend::GLSL { -void EmitPhi(EmitContext& ctx, IR::Inst& phi) { - const size_t num_args{phi.NumArgs()}; - for (size_t i = 0; i < num_args; ++i) { - ctx.var_alloc.Consume(phi.Arg(i)); - } - if (!phi.Definition().is_valid) { - // The phi node wasn't forward defined - ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); - } -} - -void EmitVoid(EmitContext& ctx) {} - -void EmitReference(EmitContext& ctx, const IR::Value& value) { - ctx.var_alloc.Consume(value); -} - -void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { - IR::Inst& phi{*phi_value.InstRecursive()}; - const auto phi_type{phi.Arg(0).Type()}; - if (!phi.Definition().is_valid) { - // The phi node wasn't forward defined - ctx.var_alloc.PhiDefine(phi, phi_type); - } - const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; - const auto val_reg{ctx.var_alloc.Consume(value)}; - if (phi_reg == val_reg) { - return; - } - ctx.Add("{}={};", phi_reg, val_reg); -} - -void EmitBranch(EmitContext& ctx, std::string_view label) { - NotImplemented(); -} - -void EmitBranchConditional(EmitContext& ctx, std::string_view condition, - std::string_view true_label, std::string_view false_label) { - NotImplemented(); -} - -void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, - std::string_view continue_label) { - NotImplemented(); -} - -void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { - NotImplemented(); -} - -void EmitReturn(EmitContext& ctx) { - NotImplemented(); -} - -void EmitJoin(EmitContext& ctx) { - NotImplemented(); -} - -void EmitUnreachable(EmitContext& ctx) { - NotImplemented(); -} - -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { - ctx.Add("discard;"); -} - -void EmitPrologue(EmitContext& ctx) {} - -void EmitEpilogue(EmitContext& ctx) {} - -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); -} - -void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { - ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); -} - void EmitGetRegister(EmitContext& ctx) { NotImplemented(); } @@ -156,26 +78,6 @@ void EmitSetOFlag(EmitContext& ctx) { NotImplemented(); } -void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU1("{}=false;", inst); -} - -void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); -} - -void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); -} - -void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=0u;", inst); -} - -void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { - NotImplemented(); -} - void EmitGetZeroFromOp(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 578bc349f..5b6175903 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -9,68 +9,55 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { -void EmitLoadSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } -void EmitLoadSharedS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); } -void EmitLoadSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); } -void EmitLoadSharedS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); } -void EmitLoadSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=smem[{}>>2];", inst, offset); } -void EmitLoadSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); } -void EmitLoadSharedU128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view offset) { +void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, offset, offset, offset, offset); } -void EmitWriteSharedU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, offset); } -void EmitWriteSharedU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, value, offset); } -void EmitWriteSharedU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { ctx.Add("smem[{}>>2]={};", offset, value); } -void EmitWriteSharedU64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { ctx.Add("smem[{}>>2]={}.x;", offset, value); ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); } -void EmitWriteSharedU128([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view offset, - [[maybe_unused]] std::string_view value) { +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { ctx.Add("smem[{}>>2]={}.x;", offset, value); ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index e69de29bb..2155b8802 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -0,0 +1,61 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { + +void EmitPhi(EmitContext& ctx, IR::Inst& phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t i = 0; i < num_args; ++i) { + ctx.var_alloc.Consume(phi.Arg(i)); + } + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); + } +} + +void EmitVoid(EmitContext&) {} + +void EmitReference(EmitContext& ctx, const IR::Value& value) { + ctx.var_alloc.Consume(value); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { + IR::Inst& phi{*phi_value.InstRecursive()}; + const auto phi_type{phi.Arg(0).Type()}; + if (!phi.Definition().is_valid) { + // The phi node wasn't forward defined + ctx.var_alloc.PhiDefine(phi, phi_type); + } + const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; + const auto val_reg{ctx.var_alloc.Consume(value)}; + if (phi_reg == val_reg) { + return; + } + ctx.Add("{}={};", phi_reg, val_reg); +} + +void EmitPrologue(EmitContext&) { + // TODO +} + +void EmitEpilogue(EmitContext&) { + // TODO +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp index e69de29bb..15bf02dd6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp @@ -0,0 +1,32 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" + +namespace Shader::Backend::GLSL { + +void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU1("{}=false;", inst); +} + +void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32("{}=0u;", inst); +} + +void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU64("{}=0u;", inst); +} + +} // namespace Shader::Backend::GLSL From 4759db28d0b98c79f9a630b63ba13c4cd0df9109 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 00:33:33 -0400 Subject: [PATCH 661/770] glsl: Address Rodrigo's feedback --- .../backend/glsl/emit_context.cpp | 31 +++++++-------- .../backend/glsl/emit_context.h | 2 +- .../backend/glsl/emit_glsl.cpp | 9 +++-- .../glsl/emit_glsl_bitwise_conversion.cpp | 2 +- .../backend/glsl/emit_glsl_composite.cpp | 9 ++++- .../glsl/emit_glsl_context_get_set.cpp | 31 +++++++++------ .../backend/glsl/emit_glsl_control_flow.cpp | 3 +- .../backend/glsl/emit_glsl_convert.cpp | 38 +++++++++---------- .../backend/glsl/emit_glsl_floating_point.cpp | 19 +++++----- .../backend/glsl/emit_glsl_instructions.h | 2 +- .../backend/glsl/emit_glsl_select.cpp | 4 +- .../backend/glsl/emit_glsl_warp.cpp | 10 ++--- .../backend/glsl/var_alloc.cpp | 2 +- 13 files changed, 87 insertions(+), 75 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 484548467..cbcf0a1eb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -13,7 +13,7 @@ u32 CbufIndex(size_t offset) { return (offset / 4) % 4; } -char CbufSwizzle(size_t offset) { +char Swizzle(size_t offset) { return "xyzw"[CbufIndex(offset)]; } @@ -341,8 +341,8 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if (info.stores_viewport_index && profile.support_viewport_index_layer_non_geometry && - stage != Stage::Geometry) { + if ((info.stores_viewport_index || info.stores_layer) && + profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency) { @@ -428,16 +428,16 @@ void EmitContext::DefineHelperFunctions() { header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; } if (info.uses_global_decrement || info.uses_shared_decrement) { - header += "uint CasDecrement(uint op_a,uint " - "op_b){return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; + header += "uint CasDecrement(uint op_a,uint op_b){" + "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; } if (info.uses_atomic_f32_add) { - header += "uint CasFloatAdd(uint op_a,float op_b){return " - "ftou(utof(op_a)+op_b);}"; + header += "uint CasFloatAdd(uint op_a,float op_b){" + "return ftou(utof(op_a)+op_b);}"; } if (info.uses_atomic_f32x2_add) { - header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " - "packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; + header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" + "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; } if (info.uses_atomic_f32x2_min) { header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " @@ -476,9 +476,10 @@ void EmitContext::DefineHelperFunctions() { "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; if (info.loads_position) { - func += fmt::format("case {}:", static_cast(IR::Attribute::PositionX) >> 2); const auto position_idx{is_array ? "gl_in[vertex]." : ""}; - func += fmt::format("return {}{}[masked_index];", position_idx, position_name); + func += fmt::format("case {}:return {}{}[masked_index];", + static_cast(IR::Attribute::PositionX) >> 2, position_idx, + position_name); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; for (u32 i = 0; i < info.input_generics.size(); ++i) { @@ -486,8 +487,8 @@ void EmitContext::DefineHelperFunctions() { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; - func += fmt::format("case {}:", base_attribute_value + i); - func += fmt::format("return in_attr{}{}[masked_index];", i, vertex_idx); + func += fmt::format("case {}:return in_attr{}{}[masked_index];", + base_attribute_value + i, i, vertex_idx); } func += "default: return 0.0;}}"; header += func; @@ -508,8 +509,8 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { for (size_t i = 0; i < addr_xy.size(); ++i) { const auto addr_loc{ssbo.cbuf_offset + 4 * i}; const auto size_loc{size_cbuf_offset + 4 * i}; - addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, CbufSwizzle(addr_loc)); - size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, CbufSwizzle(size_loc)); + addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); + size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); } const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 2b0d22ce5..0d18abe90 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -46,7 +46,7 @@ public: const auto var_def{var_alloc.AddDefine(inst, type)}; if (var_def.empty()) { // skip assigment. - code += fmt::format(&format_str[3], std::forward(args)...); + code += fmt::format(format_str + 3, std::forward(args)...); } else { code += fmt::format(format_str, var_def, std::forward(args)...); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index b189f6c11..3e6add7cd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -5,6 +5,7 @@ #include #include +#include "common/alignment.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" @@ -159,8 +160,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.var_alloc.Consume(node.data.repeat.cond)); break; default: - throw NotImplementedException("AbstractSyntaxNode::Type {}", node.type); - break; + throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); } } } @@ -209,10 +209,11 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.local_memory_size > 0) { - ctx.header += fmt::format("uint lmem[{}];", program.local_memory_size / 4); + ctx.header += fmt::format("uint lmem[{}];", Common::AlignUp(program.local_memory_size, 4)); } if (program.shared_memory_size > 0) { - ctx.header += fmt::format("shared uint smem[{}];", program.shared_memory_size / 4); + ctx.header += + fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); } ctx.header += "\nvoid main(){\n"; if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index eff672cc4..3c1714e89 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -19,7 +19,7 @@ void Alias(IR::Inst& inst, const IR::Value& value) { value_inst.DestructiveRemoveUsage(); inst.SetDefinition(value_inst.Definition()); } -} // namespace +} // Anonymous namespace void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { Alias(inst, value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 954fc67b1..7421ce97d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -13,8 +13,13 @@ namespace { constexpr std::string_view SWIZZLE{"xyzw"}; void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, std::string_view object, u32 index) { - ctx.Add("{}={};", result, composite); - ctx.Add("{}.{}={};", result, SWIZZLE[index], object); + if (result == composite) { + // The result is aliased with the composite + ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); + } else { + ctx.Add("{}={};", result, composite); + ctx.Add("{}.{}={};", result, SWIZZLE[index], object); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0546c1c81..711b568b1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -31,12 +31,7 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { } std::string OutputVertexIndex(EmitContext& ctx) { - switch (ctx.stage) { - case Stage::TessellationControl: - return "[gl_InvocationID]"; - default: - return ""; - } + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } } // Anonymous namespace @@ -219,7 +214,11 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, case IR::Attribute::ColorFrontDiffuseG: case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: - ctx.AddF32("{}=gl_FrontMaterial.diffuse.{};", inst, swizzle); + if (ctx.stage == Stage::Fragment) { + ctx.AddF32("{}=gl_Color.{};", inst, swizzle); + } else { + ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle); + } break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: @@ -300,28 +299,36 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::ColorFrontDiffuseG: case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: - ctx.Add("gl_FrontMaterial.diffuse.{}={};", swizzle, value); + if (ctx.stage == Stage::Fragment) { + ctx.Add("gl_Color.{}={};", swizzle, value); + } else { + ctx.Add("gl_FrontColor.{}={};", swizzle, value); + } break; case IR::Attribute::ColorFrontSpecularR: case IR::Attribute::ColorFrontSpecularG: case IR::Attribute::ColorFrontSpecularB: case IR::Attribute::ColorFrontSpecularA: - ctx.Add("gl_FrontMaterial.specular.{}={};", swizzle, value); + if (ctx.stage == Stage::Fragment) { + ctx.Add("gl_SecondaryColor.{}={};", swizzle, value); + } else { + ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); + } break; case IR::Attribute::ColorBackDiffuseR: case IR::Attribute::ColorBackDiffuseG: case IR::Attribute::ColorBackDiffuseB: case IR::Attribute::ColorBackDiffuseA: - ctx.Add("gl_BackMaterial.diffuse.{}={};", swizzle, value); + ctx.Add("gl_BackColor.{}={};", swizzle, value); break; case IR::Attribute::ColorBackSpecularR: case IR::Attribute::ColorBackSpecularG: case IR::Attribute::ColorBackSpecularB: case IR::Attribute::ColorBackSpecularA: - ctx.Add("gl_BackMaterial.specular.{}={};", swizzle, value); + ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::FogCoordinate: - ctx.Add("gl_FragCoord.x={};", value); + ctx.Add("gl_FogFragCoord.x={};", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp index 59522fdbd..53f8896be 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp @@ -14,8 +14,7 @@ void EmitJoin(EmitContext&) { throw NotImplementedException("Join shouldn't be emitted"); } -void EmitDemoteToHelperInvocation(EmitContext& ctx, - [[maybe_unused]] std::string_view continue_label) { +void EmitDemoteToHelperInvocation(EmitContext& ctx) { ctx.Add("discard;"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 8d823e466..777e290b4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -15,7 +15,7 @@ void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS32("{}=int(float({}))&0xffff;", inst, value); + ctx.AddS32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); } void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -29,11 +29,11 @@ void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS32("{}=int(float({}));", inst, value); + ctx.AddS32("{}=int({});", inst, value); } void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS32("{}=int(double({}));", inst, value); + ctx.AddS32("{}=int({});", inst, value); } void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -42,11 +42,11 @@ void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS64("{}=int64_t(double(float({})));", inst, value); + ctx.AddS64("{}=int64_t(double({}));", inst, value); } void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS64("{}=int64_t(double({}));", inst, value); + ctx.AddS64("{}=int64_t({});", inst, value); } void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -70,11 +70,11 @@ void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=uint(float({}));", inst, value); + ctx.AddU32("{}=uint({});", inst, value); } void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=uint(double({}));", inst, value); + ctx.AddU32("{}=uint({});", inst, value); } void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -83,19 +83,19 @@ void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=uint64_t(float({}));", inst, value); + ctx.AddU64("{}=uint64_t({});", inst, value); } void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=uint64_t(double({}));", inst, value); + ctx.AddU64("{}=uint64_t({});", inst, value); } void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=uint64_t(uint({}));", inst, value); + ctx.AddU64("{}=uint64_t({});", inst, value); } void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=uint(uint64_t({}));", inst, value); + ctx.AddU32("{}=uint({});", inst, value); } void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -109,11 +109,11 @@ void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=float(double({}));", inst, value); + ctx.AddF32("{}=float({});", inst, value); } void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF64("{}=double(float({}));", inst, value); + ctx.AddF64("{}=double({});", inst, value); } void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -171,7 +171,7 @@ void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) } void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=float(double(int64_t({})));", inst, value); + ctx.AddF32("{}=float(int64_t({}));", inst, value); } void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -180,15 +180,15 @@ void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::In } void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=float(uint({}&0xffff));", inst, value); + ctx.AddF32("{}=float({}&0xffff);", inst, value); } void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=float(uint({}));", inst, value); + ctx.AddF32("{}=float({});", inst, value); } void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=float(double(uint64_t({})));", inst, value); + ctx.AddF32("{}=float({});", inst, value); } void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -220,11 +220,11 @@ void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF64("{}=double(uint({}));", inst, value); + ctx.AddF64("{}=double({});", inst, value); } void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF64("{}=double(uint64_t({}));", inst, value); + ctx.AddF64("{}=double({});", inst, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index fbf66015f..b11be5bd7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -15,14 +15,13 @@ void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string std::string_view op, bool ordered) { ctx.AddU1("{}={}{}{}", inst, lhs, op, rhs, lhs, rhs); if (ordered) { - ctx.code += fmt::format("&&!isnan({})&&!isnan({})", lhs, rhs); + ctx.Add("&&!isnan({})&&!isnan({});", lhs, rhs); } else { - ctx.code += fmt::format("||isnan({})||isnan({})", lhs, rhs); + ctx.Add("||isnan({})||isnan({});", lhs, rhs); } - ctx.code += ";"; } -bool Precise(IR::Inst& inst) { +bool IsPrecise(const IR::Inst& inst) { return {inst.Flags().no_contraction}; } } // Anonymous namespace @@ -46,7 +45,7 @@ void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - if (Precise(inst)) { + if (IsPrecise(inst)) { ctx.AddPrecF32("{}={}+{};", inst, a, b); } else { ctx.AddF32("{}={}+{};", inst, a, b); @@ -54,7 +53,7 @@ void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri } void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - if (Precise(inst)) { + if (IsPrecise(inst)) { ctx.AddPrecF64("{}={}+{};", inst, a, b); } else { ctx.AddF64("{}={}+{};", inst, a, b); @@ -69,7 +68,7 @@ void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c) { - if (Precise(inst)) { + if (IsPrecise(inst)) { ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); } else { ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); @@ -78,7 +77,7 @@ void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, std::string_view c) { - if (Precise(inst)) { + if (IsPrecise(inst)) { ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); } else { ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); @@ -107,7 +106,7 @@ void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - if (Precise(inst)) { + if (IsPrecise(inst)) { ctx.AddPrecF32("{}={}*{};", inst, a, b); } else { ctx.AddF32("{}={}*{};", inst, a, b); @@ -115,7 +114,7 @@ void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::stri } void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - if (Precise(inst)) { + if (IsPrecise(inst)) { ctx.AddPrecF64("{}={}*{};", inst, a, b); } else { ctx.AddF64("{}={}*{};", inst, a, b); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 1257575c0..0c717664f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -31,7 +31,7 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); void EmitReference(EmitContext& ctx, const IR::Value& value); void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); void EmitJoin(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 49fba9073..7aa6096e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -28,12 +28,12 @@ void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); + ctx.AddU32("{}={}?uint({}):uint({});", inst, cond, true_value, false_value); } void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); + ctx.AddU64("{}={}?uint64_t({}):uint64_t({});", inst, cond, true_value, false_value); } void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 6ced0776c..7047928fd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -89,23 +89,23 @@ void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) } void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uvec2(gl_SubGroupEqMaskARB).x;", inst); + ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); } void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uvec2(gl_SubGroupLtMaskARB).x;", inst); + ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); } void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uvec2(gl_SubGroupLeMaskARB).x;", inst); + ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); } void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uvec2(gl_SubGroupGtMaskARB).x;", inst); + ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); } void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { - ctx.AddU32("{}=uvec2(gl_SubGroupGeMaskARB).x;", inst); + ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); } void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 95e8233e2..6a19aa549 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -116,7 +116,7 @@ std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { id.type.Assign(type); GetUseTracker(type).uses_temp = true; inst.SetDefinition(id); - return "t" + Representation(inst.Definition()); + return 't' + Representation(inst.Definition()); } } From 7d89a82a4891f78e2c068a24ad3bb56d74c92055 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 00:41:28 -0400 Subject: [PATCH 662/770] glsl: Remove Signed Integer variables --- .../backend/glsl/emit_context.h | 10 ---------- .../backend/glsl/emit_glsl_atomic.cpp | 4 ++-- .../backend/glsl/emit_glsl_convert.cpp | 10 +++++----- .../backend/glsl/emit_glsl_integer.cpp | 2 +- .../backend/glsl/emit_glsl_memory.cpp | 4 ++-- .../backend/glsl/emit_glsl_shared_memory.cpp | 4 ++-- src/shader_recompiler/backend/glsl/var_alloc.cpp | 16 ---------------- src/shader_recompiler/backend/glsl/var_alloc.h | 6 +----- 8 files changed, 13 insertions(+), 43 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 0d18abe90..9d8be0c9a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -69,21 +69,11 @@ public: Add(format_str, inst, args...); } - template - void AddS32(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); - } - template void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); } - template - void AddS64(const char* format_str, IR::Inst& inst, Args&&... args) { - Add(format_str, inst, args...); - } - template void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 7353d5d4e..850eee1e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -183,7 +183,7 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); ctx.Add("for(int i=0;i<2;++i){{ " @@ -208,7 +208,7 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { // LOG_WARNING(..., "Op falling to non-atomic"); - ctx.AddS64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, + ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); ctx.Add("for(int i=0;i<2;++i){{ " diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index 777e290b4..c9f173e2f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -15,7 +15,7 @@ void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); + ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); } void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -29,11 +29,11 @@ void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS32("{}=int({});", inst, value); + ctx.AddU32("{}=int({});", inst, value); } void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS32("{}=int({});", inst, value); + ctx.AddU32("{}=int({});", inst, value); } void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, @@ -42,11 +42,11 @@ void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS64("{}=int64_t(double({}));", inst, value); + ctx.AddU64("{}=int64_t(double({}));", inst, value); } void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddS64("{}=int64_t({});", inst, value); + ctx.AddU64("{}=int64_t({});", inst, value); } void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 6e04c6cb2..7a2f79d10 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -103,7 +103,7 @@ void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view shift) { - ctx.AddS32("{}=int({})>>{};", inst, base, shift); + ctx.AddU32("{}=int({})>>{};", inst, base, shift); } void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index af3dadf71..daef5fb84 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -87,7 +87,7 @@ void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindin void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } @@ -101,7 +101,7 @@ void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.var_alloc.Consume(offset)}; - ctx.AddS32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, + ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, ctx.stage_name, binding.U32(), offset_var, offset_var); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 5b6175903..7abc6575f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -14,7 +14,7 @@ void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) } void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { - ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); } void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { @@ -22,7 +22,7 @@ void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset } void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { - ctx.AddS32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); + ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); } void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index 6a19aa549..cbf56491c 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -21,12 +21,8 @@ std::string TypePrefix(GlslVarType type) { return "f16x2_"; case GlslVarType::U32: return "u_"; - case GlslVarType::S32: - return "s_"; case GlslVarType::F32: return "f_"; - case GlslVarType::S64: - return "s64_"; case GlslVarType::U64: return "u64_"; case GlslVarType::F64: @@ -213,13 +209,9 @@ std::string VarAlloc::GetGlslType(GlslVarType type) const { return "f16vec2"; case GlslVarType::U32: return "uint"; - case GlslVarType::S32: - return "int"; case GlslVarType::F32: case GlslVarType::PrecF32: return "float"; - case GlslVarType::S64: - return "int64_t"; case GlslVarType::U64: return "uint64_t"; case GlslVarType::F64: @@ -252,12 +244,8 @@ VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { return var_f16x2; case GlslVarType::U32: return var_u32; - case GlslVarType::S32: - return var_s32; case GlslVarType::F32: return var_f32; - case GlslVarType::S64: - return var_s64; case GlslVarType::U64: return var_u64; case GlslVarType::F64: @@ -291,12 +279,8 @@ const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { return var_f16x2; case GlslVarType::U32: return var_u32; - case GlslVarType::S32: - return var_s32; case GlslVarType::F32: return var_f32; - case GlslVarType::S64: - return var_s64; case GlslVarType::U64: return var_u64; case GlslVarType::F64: diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h index ab1d1acbd..8b49f32a6 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.h +++ b/src/shader_recompiler/backend/glsl/var_alloc.h @@ -21,10 +21,8 @@ namespace Shader::Backend::GLSL { enum class GlslVarType : u32 { U1, F16x2, - S32, U32, F32, - S64, U64, F64, U32x2, @@ -42,7 +40,7 @@ struct Id { union { u32 raw; BitField<0, 1, u32> is_valid; - BitField<1, 5, GlslVarType> type; + BitField<1, 4, GlslVarType> type; BitField<6, 26, u32> index; }; @@ -90,7 +88,6 @@ private: UseTracker var_bool{}; UseTracker var_f16x2{}; - UseTracker var_s32{}; UseTracker var_u32{}; UseTracker var_u32x2{}; UseTracker var_u32x3{}; @@ -100,7 +97,6 @@ private: UseTracker var_f32x3{}; UseTracker var_f32x4{}; UseTracker var_u64{}; - UseTracker var_s64{}; UseTracker var_f64{}; UseTracker var_precf32{}; UseTracker var_precf64{}; From e81c73a8748ccfcde56acfee5630116c3950e479 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 02:50:30 -0400 Subject: [PATCH 663/770] glsl: Address more feedback. Implement indexed texture reads --- .../backend/glsl/emit_context.cpp | 48 +++---- .../backend/glsl/emit_context.h | 13 +- .../backend/glsl/emit_glsl_image.cpp | 136 ++++++++---------- .../backend/glsl/emit_glsl_select.cpp | 4 +- .../backend/glsl/emit_glsl_shared_memory.cpp | 19 ++- .../renderer_opengl/gl_shader_util.cpp | 6 +- 6 files changed, 112 insertions(+), 114 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index cbcf0a1eb..ed10eca8a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -559,53 +559,45 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { } void EmitContext::SetupImages(Bindings& bindings) { - image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + image_buffers.reserve(info.image_buffer_descriptors.size()); for (const auto& desc : info.image_buffer_descriptors) { - image_buffer_bindings.push_back(bindings.image); - const auto indices{bindings.image + desc.count}; + image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{};", - bindings.image, format, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, + format, bindings.image, array_decorator); bindings.image += desc.count; } - image_bindings.reserve(info.image_descriptors.size()); + images.reserve(info.image_descriptors.size()); for (const auto& desc : info.image_descriptors) { - image_bindings.push_back(bindings.image); + images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; const auto qualifier{desc.is_written ? "" : "readonly "}; - const auto indices{bindings.image + desc.count}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}{})uniform {}{} img{};", bindings.image, format, - qualifier, image_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, + qualifier, image_type, bindings.image, array_decorator); bindings.image += desc.count; } } void EmitContext::SetupTextures(Bindings& bindings) { - texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { - texture_buffer_bindings.push_back(bindings.texture); + texture_buffers.push_back({bindings.texture, desc.count}); const auto sampler_type{SamplerType(TextureType::Buffer, false)}; - const auto indices{bindings.texture + desc.count}; - for (u32 index = bindings.texture; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, - sampler_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); bindings.texture += desc.count; } - texture_bindings.reserve(info.texture_descriptors.size()); + textures.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { + textures.push_back({bindings.texture, desc.count}); const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; - texture_bindings.push_back(bindings.texture); - const auto indices{bindings.texture + desc.count}; - for (u32 index = bindings.texture; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, - sampler_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); bindings.texture += desc.count; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 9d8be0c9a..685f56089 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -36,6 +36,11 @@ struct GenericElementInfo { u32 num_components{}; }; +struct TextureImageDefinition { + u32 binding; + u32 count; +}; + class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -142,10 +147,10 @@ public: std::string_view stage_name = "invalid"; std::string_view position_name = "gl_Position"; - std::vector texture_buffer_bindings; - std::vector image_buffer_bindings; - std::vector texture_bindings; - std::vector image_bindings; + std::vector texture_buffers; + std::vector image_buffers; + std::vector textures; + std::vector images; std::array, 32> output_generics{}; bool uses_y_direction{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 00fe288e2..6a98f7ac2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -12,20 +12,18 @@ namespace Shader::Backend::GLSL { namespace { -std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info) { - if (info.type == TextureType::Buffer) { - return fmt::format("tex{}", ctx.texture_buffer_bindings.at(info.descriptor_index)); - } else { - return fmt::format("tex{}", ctx.texture_bindings.at(info.descriptor_index)); - } +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) + : ctx.textures.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("tex{}{}", def.binding, index_offset); } -std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info) { - if (info.type == TextureType::Buffer) { - return fmt::format("img{}", ctx.image_buffer_bindings.at(info.descriptor_index)); - } else { - return fmt::format("img{}", ctx.image_bindings.at(info.descriptor_index)); - } +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) + : ctx.images.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("img{}{}", def.binding, index_offset); } std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { @@ -137,14 +135,14 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } } // Anonymous namespace -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view bias_lc, const IR::Value& offset) { +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; @@ -175,9 +173,9 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view lod_lc, const IR::Value& offset) { +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_bias) { throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); @@ -185,7 +183,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -208,8 +206,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; @@ -223,7 +220,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -263,8 +260,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view lod_lc, const IR::Value& offset) { const auto info{inst.Flags()}; @@ -278,7 +274,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -313,10 +309,10 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -355,11 +351,11 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR info.gather_component); } -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2, std::string_view dref) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -395,7 +391,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] cons *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); } -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, [[maybe_unused]] std::string_view ms) { const auto info{inst.Flags()}; @@ -405,7 +401,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR: if (info.has_lod_clamp) { throw NotImplementedException("EmitImageFetch Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; if (!sparse_inst) { @@ -433,10 +429,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR: } } -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view lod) { +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; switch (info.type) { case TextureType::Color1D: return ctx.AddU32x4( @@ -460,14 +456,14 @@ void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, throw LogicError("Unspecified image type {}", info.type.Value()); } -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); } -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& derivatives, const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { const auto info{inst.Flags()}; @@ -481,7 +477,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const if (!offset.IsEmpty()) { throw NotImplementedException("EmitImageGradient offset"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; @@ -494,65 +490,60 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const } } -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { throw NotImplementedException("EmitImageRead Sparse"); } - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, TexelFetchCastToInt(coords, info)); } -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); } -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } @@ -567,35 +558,34 @@ void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 7aa6096e6..49fba9073 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -28,12 +28,12 @@ void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU32("{}={}?uint({}):uint({});", inst, cond, true_value, false_value); + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU64("{}={}?uint64_t({}):uint64_t({});", inst, cond, true_value, false_value); + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 7abc6575f..8a13bf617 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -9,6 +9,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view bit_offset, u32 num_bits) { + const auto smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); +} +} // Anonymous namespace void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } @@ -39,13 +50,13 @@ void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offse } void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { - ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, - offset); + const auto bit_offset{fmt::format("int({}%4)*8", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 8); } void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { - ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, - value, offset); + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 16); } void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index ac6f33e34..5109985f1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader, std::optional code = {}) { +static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,8 +28,8 @@ static void LogShader(GLuint shader, std::optional code = {}) glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); - if (code.has_value()) { - LOG_INFO(Render_OpenGL, "\n{}", *code); + if (!code.empty()) { + LOG_INFO(Render_OpenGL, "\n{}", code); } } else { LOG_WARNING(Render_OpenGL, "{}", log); From 84c86e03cda7d8c146d8afdbba3ddf628914819b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 21:49:33 -0400 Subject: [PATCH 664/770] glsl: Refactor GetCbuf functions to reduce code duplication --- .../glsl/emit_glsl_context_get_set.cpp | 178 +++++++----------- 1 file changed, 70 insertions(+), 108 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 711b568b1..5861c4d4c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -33,144 +33,106 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { std::string OutputVertexIndex(EmitContext& ctx) { return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } + +void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + u32 num_bits, std::string_view cast = {}, bool component_indexing_bug = false, + std::string_view bit_offset = {}) { + const bool is_immediate{offset.IsImmediate()}; + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) + : fmt::format("{}>>4", offset_var)}; + const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) + : fmt::format("[({}>>2)%4]", offset_var)}; + + const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; + const auto extraction{num_bits == 32 ? cbuf_cast + : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, + bit_offset, num_bits)}; + if (!component_indexing_bug) { + const auto result{fmt::format(extraction, swizzle)}; + ctx.AddU32("{}={};", inst, result); + return; + } + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; + for (u32 i = 0; i < 4; ++i) { + const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; + const auto result{fmt::format(extraction, swizzle_string)}; + ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); + } +} + +void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; + GetCbuf(ctx, inst, binding, offset, 8, cast, false, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; + GetCbuf(ctx, inst, binding, offset, 8, cast, ctx.profile.has_gl_component_indexing_bug, + bit_offset); + } +} + +void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, + std::string_view cast) { + if (offset.IsImmediate()) { + const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; + GetCbuf(ctx, inst, binding, offset, 16, cast, false, bit_offset); + } else { + const auto offset_var{ctx.var_alloc.Consume(offset)}; + const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; + GetCbuf(ctx, inst, binding, offset, 16, cast, ctx.profile.has_gl_component_indexing_bug, + bit_offset); + } +} } // Anonymous namespace void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftou({}[{}].{}),int({}),8);", inst, cbuf, offset.U32() / 16, - OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); - return; - } - const auto offset_var{ctx.var_alloc.Consume(offset)}; - if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32("{}=bitfieldExtract(ftou({}[{}>>4][({}>>2)%4]),int(({}%4)*8),8);", inst, cbuf, - offset_var, offset_var, offset_var); - return; - } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftou({}[{}>>4].{}),int(({}%4)*8),8);", - cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); - } + GetCbuf8(ctx, inst, binding, offset, "ftou"); } void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}].{}),int({}),8);", inst, cbuf, offset.U32() / 16, - OffsetSwizzle(offset.U32()), (offset.U32() % 4) * 8); - return; - } - const auto offset_var{ctx.var_alloc.Consume(offset)}; - if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}>>4][({}>>2)%4]),int(({}%4)*8),8);", inst, cbuf, - offset_var, offset_var, offset_var); - return; - } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftoi({}[{}>>4].{}),int(({}%4)*8),8);", - cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); - } + GetCbuf8(ctx, inst, binding, offset, "ftoi"); } void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftou({}[{}].{}),int({}),16);", inst, cbuf, offset.U32() / 16, - OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); - return; - } - const auto offset_var{ctx.var_alloc.Consume(offset)}; - if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32("{}=bitfieldExtract(ftou({}[{}>>4][({}>>2)%4]),int((({}>>1)%2)*16),16);", inst, - cbuf, offset_var, offset_var, offset_var); - return; - } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftou({}[{}>>4].{}),int((({}>>1)%2)*16),16);", - cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); - } + GetCbuf16(ctx, inst, binding, offset, "ftou"); } void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - if (offset.IsImmediate()) { - ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}].{}),int({}),16);", inst, cbuf, offset.U32() / 16, - OffsetSwizzle(offset.U32()), ((offset.U32() / 2) % 2) * 16); - return; - } - const auto offset_var{ctx.var_alloc.Consume(offset)}; - if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32("{}=bitfieldExtract(ftoi({}[{}>>4][({}>>2)%4]),int((({}>>1)%2)*16),16);", inst, - cbuf, offset_var, offset_var, offset_var); - return; - } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=bitfieldExtract(ftoi({}[{}>>4].{}),int((({}>>1)%2)*16),16);", - cbuf_offset, swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], offset_var); - } + GetCbuf16(ctx, inst, binding, offset, "ftoi"); } void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - if (offset.IsImmediate()) { - ctx.AddU32("{}=ftou({}[{}].{});", inst, cbuf, offset.U32() / 16, - OffsetSwizzle(offset.U32())); - return; - } - const auto offset_var{ctx.var_alloc.Consume(offset)}; - if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32("{}=ftou({}[{}>>4][({}>>2)%4]);", inst, cbuf, offset_var, offset_var); - return; - } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=ftou({}[{}>>4].{});", cbuf_offset, swizzle, ret, cbuf, offset_var, - "xyzw"[swizzle]); - } + GetCbuf(ctx, inst, binding, offset, 32, "ftou"); } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; - if (offset.IsImmediate()) { - ctx.AddF32("{}={}[{}].{};", inst, cbuf, offset.U32() / 16, OffsetSwizzle(offset.U32())); - return; - } - const auto offset_var{ctx.var_alloc.Consume(offset)}; - if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddF32("{}={}[{}>>4][({}>>2)%4];", inst, cbuf, offset_var, offset_var); - return; - } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; - const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}={}[{}>>4].{};", cbuf_offset, swizzle, ret, cbuf, offset_var, - "xyzw"[swizzle]); - } + GetCbuf(ctx, inst, binding, offset, 32); } void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, offset.U32() / 16, - OffsetSwizzle(offset.U32()), cbuf, (offset.U32() + 4) / 16, - OffsetSwizzle(offset.U32() + 4)); + const u32 u32_offset{offset.U32()}; + if (u32_offset % 2 == 0) { + ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); + } else { + ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, + OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, + OffsetSwizzle(u32_offset + 4)); + } return; } const auto offset_var{ctx.var_alloc.Consume(offset)}; From 7c82f20b52e9f4145f9030b8726d02a9f8a740a1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 22:16:34 -0400 Subject: [PATCH 665/770] glsl: Add immediate index oob checking for Cbuf getters --- .../backend/glsl/emit_glsl_context_get_set.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 5861c4d4c..8223ad862 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -38,6 +38,15 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const I u32 num_bits, std::string_view cast = {}, bool component_indexing_bug = false, std::string_view bit_offset = {}) { const bool is_immediate{offset.IsImmediate()}; + if (is_immediate) { + const s32 signed_offset{static_cast(offset.U32())}; + static constexpr u32 cbuf_size{4096 * 16}; + if (signed_offset < 0 || offset.U32() > cbuf_size) { + // LOG_WARNING(..., "Immediate constant buffer offset is out of bounds"); + ctx.AddU32("{}=0u;", inst); + return; + } + } const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) : fmt::format("{}>>4", offset_var)}; @@ -124,7 +133,14 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { + static constexpr u32 cbuf_size{4096 * 16}; const u32 u32_offset{offset.U32()}; + const s32 signed_offset{static_cast(offset.U32())}; + if (signed_offset < 0 || u32_offset > cbuf_size) { + // LOG_WARNING(..., "Immediate constant buffer offset is out of bounds"); + ctx.AddU32x2("{}=uvec2(0u);", inst); + return; + } if (u32_offset % 2 == 0) { ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); From 427a2596a1de1875fd2c4d483cea482b80c986b2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 22:43:43 -0400 Subject: [PATCH 666/770] glsl: Fix Cbuf getters for F32 type --- .../glsl/emit_glsl_context_get_set.cpp | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 8223ad862..96296ad28 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -34,16 +34,16 @@ std::string OutputVertexIndex(EmitContext& ctx) { return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } -void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, - u32 num_bits, std::string_view cast = {}, bool component_indexing_bug = false, - std::string_view bit_offset = {}) { +void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, + const IR::Value& offset, u32 num_bits, std::string_view cast = {}, + bool component_indexing_bug = false, std::string_view bit_offset = {}) { const bool is_immediate{offset.IsImmediate()}; if (is_immediate) { const s32 signed_offset{static_cast(offset.U32())}; static constexpr u32 cbuf_size{4096 * 16}; if (signed_offset < 0 || offset.U32() > cbuf_size) { // LOG_WARNING(..., "Immediate constant buffer offset is out of bounds"); - ctx.AddU32("{}=0u;", inst); + ctx.Add("{}=0u;", ret); return; } } @@ -60,10 +60,9 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const I bit_offset, num_bits)}; if (!component_indexing_bug) { const auto result{fmt::format(extraction, swizzle)}; - ctx.AddU32("{}={};", inst, result); + ctx.Add("{}={};", ret, result); return; } - const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; for (u32 i = 0; i < 4; ++i) { const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; @@ -74,26 +73,28 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const I void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (offset.IsImmediate()) { const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; - GetCbuf(ctx, inst, binding, offset, 8, cast, false, bit_offset); + GetCbuf(ctx, ret, binding, offset, 8, cast, false, bit_offset); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; - GetCbuf(ctx, inst, binding, offset, 8, cast, ctx.profile.has_gl_component_indexing_bug, + GetCbuf(ctx, ret, binding, offset, 8, cast, ctx.profile.has_gl_component_indexing_bug, bit_offset); } } void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view cast) { + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (offset.IsImmediate()) { const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; - GetCbuf(ctx, inst, binding, offset, 16, cast, false, bit_offset); + GetCbuf(ctx, ret, binding, offset, 16, cast, false, bit_offset); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; - GetCbuf(ctx, inst, binding, offset, 16, cast, ctx.profile.has_gl_component_indexing_bug, + GetCbuf(ctx, ret, binding, offset, 16, cast, ctx.profile.has_gl_component_indexing_bug, bit_offset); } } @@ -121,12 +122,14 @@ void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf(ctx, inst, binding, offset, 32, "ftou"); + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + GetCbuf(ctx, ret, binding, offset, 32, "ftou"); } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf(ctx, inst, binding, offset, 32); + const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; + GetCbuf(ctx, ret, binding, offset, 32); } void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, From 39c29664f9aff5069c3a06435b1430db9903ff86 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 01:36:33 -0400 Subject: [PATCH 667/770] glsl: Minor cleanup --- .../backend/glsl/emit_glsl_convert.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 32 ++++++++----------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp index c9f173e2f..eeae6562c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp @@ -42,7 +42,7 @@ void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I } void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=int64_t(double({}));", inst, value); + ctx.AddU64("{}=int64_t({});", inst, value); } void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 6a98f7ac2..51181d1c1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -45,7 +45,7 @@ std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info } } -std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInfo& info) { +std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) { switch (info.type) { case TextureType::Color1D: case TextureType::Buffer: @@ -407,13 +407,13 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, if (!sparse_inst) { if (!offset.empty()) { ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, - TexelFetchCastToInt(coords, info), lod, TexelFetchCastToInt(offset, info)); + CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); } else { if (info.type == TextureType::Buffer) { ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); } else { ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, - TexelFetchCastToInt(coords, info), lod); + CoordsCastToInt(coords, info), lod); } } return; @@ -498,29 +498,28 @@ void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, throw NotImplementedException("EmitImageRead Sparse"); } const auto image{Image(ctx, info, index)}; - ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, TexelFetchCastToInt(coords, info)); + ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info)); } void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); + ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color); } void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), - value); + ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); } void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), + ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info), value); } @@ -528,7 +527,7 @@ void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& in std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), + ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), value); } @@ -536,7 +535,7 @@ void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& in std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), + ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info), value); } @@ -544,7 +543,7 @@ void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& in std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), + ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), value); } @@ -562,31 +561,28 @@ void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& ind std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), - value); + ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); } void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, TexelFetchCastToInt(coords, info), - value); + ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value); } void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, TexelFetchCastToInt(coords, info), - value); + ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value); } void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; const auto image{Image(ctx, info, index)}; - ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, TexelFetchCastToInt(coords, info), + ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info), value); } From 413eb6983f07bb4139cd07c5dca22bdb30e6af2d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:06:11 -0400 Subject: [PATCH 668/770] gl_shader_cache: Move OGL shader compilation to the respective Pipeline constructor --- .../renderer_opengl/gl_compute_pipeline.cpp | 13 +++- .../renderer_opengl/gl_compute_pipeline.h | 2 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 67 +++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 4 +- .../renderer_opengl/gl_shader_cache.cpp | 69 +++---------------- 5 files changed, 79 insertions(+), 76 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index a40106c87..f984b635c 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -39,10 +40,16 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + const std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { + if (device.UseAssemblyShaders()) { + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5dfb65e9..a93166eb6 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + const std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a2ea35d5a..4d62d7062 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -9,6 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/texture_cache/texture_cache.h" @@ -33,6 +34,40 @@ u32 AccumulateCount(const Range& range) { return num; } +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -82,19 +117,33 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{assembly_sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{glsl_sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 508fad5bb..984bf994f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4fcf4e458..884739aec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -56,40 +56,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -GLenum Stage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_SHADER; - case 1: - return GL_TESS_CONTROL_SHADER; - case 2: - return GL_TESS_EVALUATION_SHADER; - case 3: - return GL_GEOMETRY_SHADER; - case 4: - return GL_FRAGMENT_SHADER; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - -GLenum AssemblyStage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_PROGRAM_NV; - case 1: - return GL_TESS_CONTROL_PROGRAM_NV; - case 2: - return GL_TESS_EVALUATION_PROGRAM_NV; - case 3: - return GL_GEOMETRY_PROGRAM_NV; - case 4: - return GL_FRAGMENT_PROGRAM_NV; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { @@ -426,12 +392,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_programs; + std::array assembly_sources; + std::array glsl_sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; - if (!use_glasm) { - source_program.handle = glCreateProgram(); - } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -446,20 +410,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; - assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - AttachShader(Stage(stage_index), source_program.handle, code); + glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } - if (!use_glasm) { - LinkProgram(source_program.handle); - } return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -496,21 +454,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - - OGLAssemblyProgram asm_program; - OGLProgram source_program; - if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, info, program)}; - asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { - const auto code{EmitGLSL(profile, program)}; - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); - } + const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) + : EmitGLSL(profile, program)}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, - std::move(source_program), std::move(asm_program)); + kepler_compute, program_manager, program.info, code); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; From ff3de0fb6bb46bcb59421cef203ca8e8daaec85c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:11:13 -0400 Subject: [PATCH 669/770] gl_shader_cache: Remove const from pipeline source arguments --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 2 +- src/video_core/renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index f984b635c..2d6442d74 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -40,7 +40,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code) + std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { if (device.UseAssemblyShaders()) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index a93166eb6..b5fc45f26 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code); + std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 4d62d7062..d64723d6b 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -117,8 +117,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 984bf994f..dc791be53 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); From 6aa1bf7b6ff86bb7325e5b50709ddf5477b1e855 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 20:14:56 -0400 Subject: [PATCH 670/770] glsl: Implement legacy varyings --- .../backend/glsl/emit_context.cpp | 43 ++++++++++++++++++- .../backend/glsl/emit_context.h | 2 +- .../backend/glsl/emit_glsl.cpp | 2 +- .../glsl/emit_glsl_context_get_set.cpp | 31 +++++++++++-- .../ir_opt/collect_shader_info_pass.cpp | 9 +++- src/shader_recompiler/shader_info.h | 2 + 6 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ed10eca8a..f0e9dffc2 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -200,6 +200,27 @@ std::string_view OutputPrimitive(OutputTopology topology) { throw InvalidArgument("Invalid output topology {}", topology); } +void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { + if (!ctx.info.stores_legacy_varyings) { + return; + } + if (ctx.info.stores_fixed_fnc_textures) { + header += "vec4 gl_TexCoord[8];"; + } + if (ctx.info.stores_color_front_diffuse) { + header += "vec4 gl_FrontColor;"; + } + if (ctx.info.stores_color_front_specular) { + header += "vec4 gl_FrontSecondaryColor;"; + } + if (ctx.info.stores_color_back_diffuse) { + header += "vec4 gl_BackColor;"; + } + if (ctx.info.stores_color_back_specular) { + header += "vec4 gl_BackSecondaryColor;"; + } +} + void SetupOutPerVertex(EmitContext& ctx, std::string& header) { if (!StoresPerVertexAttributes(ctx.stage)) { return; @@ -215,18 +236,34 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } + SetupLegacyOutPerVertex(ctx, header); header += "};"; if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } + +void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { + if (!ctx.info.loads_legacy_varyings) { + return; + } + header += "in gl_PerFragment{"; + if (ctx.info.loads_fixed_fnc_textures) { + header += "vec4 gl_TexCoord[8];"; + } + if (ctx.info.loads_color_front_diffuse) { + header += "vec4 gl_Color;"; + } + header += "};"; +} + } // Anonymous namespace EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { header += "#pragma optionNV(fastmath off)\n"; - SetupExtensions(header); + SetupExtensions(); stage = program.stage; switch (program.stage) { case Stage::VertexA: @@ -271,6 +308,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } SetupOutPerVertex(*this, header); + SetupLegacyInPerFragment(*this, header); + for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { @@ -306,7 +345,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } -void EmitContext::SetupExtensions(std::string&) { +void EmitContext::SetupExtensions() { if (profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 685f56089..8fa87c02c 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -157,7 +157,7 @@ public: bool uses_cc_carry{}; private: - void SetupExtensions(std::string& header); + void SetupExtensions(); void DefineConstantBuffers(Bindings& bindings); void DefineStorageBuffers(Bindings& bindings); void DefineGenericOutput(size_t index, u32 invocations); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 3e6add7cd..d76b63b2d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -166,7 +166,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction) { + if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 96296ad28..3eeccfb3c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -98,6 +98,10 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const bit_offset); } } + +u32 TexCoordIndex(IR::Attribute attr) { + return (static_cast(attr) - static_cast(IR::Attribute::FixedFncTexture0S)) / 4; +} } // Anonymous namespace void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -178,6 +182,17 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); return; } + // GLSL only exposes 8 legacy texcoords + if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { + // LOG_WARNING(..., "GLSL does not allow access to gl_TexCoord[{}]", TexCoordIndex(attr)); + ctx.AddF32("{}=0.f;", inst); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle); + return; + } switch (attr) { case IR::Attribute::PrimitiveId: ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); @@ -227,19 +242,29 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val [[maybe_unused]] std::string_view vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const u32 element{IR::GenericAttributeElement(attr)}; - const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + const u32 attr_element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; const auto output_decorator{OutputVertexIndex(ctx)}; if (info.num_components == 1) { ctx.Add("{}{}={};", info.name, output_decorator, value); } else { - const u32 index_element{element - info.first_element}; + const u32 index_element{attr_element - info.first_element}; ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); } return; } const u32 element{static_cast(attr) % 4}; const char swizzle{"xyzw"[element]}; + // GLSL only exposes 8 legacy texcoords + if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { + // LOG_WARNING(..., "GLSL does not allow access to gl_TexCoord[{}]", TexCoordIndex(attr)); + return; + } + if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { + const u32 index{TexCoordIndex(attr)}; + ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value); + return; + } switch (attr) { case IR::Attribute::Layer: if (ctx.stage != Stage::Geometry && diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index dc78cdefb..10d2822ae 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -35,6 +35,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { info.loads_fixed_fnc_textures = true; + info.loads_legacy_varyings = true; return; } switch (attr) { @@ -52,6 +53,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: info.loads_color_front_diffuse = true; + info.loads_legacy_varyings = true; break; case IR::Attribute::PointSpriteS: case IR::Attribute::PointSpriteT: @@ -82,6 +84,7 @@ void SetAttribute(Info& info, IR::Attribute attr) { } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { info.stores_fixed_fnc_textures = true; + info.stores_legacy_varyings = true; return; } switch (attr) { @@ -105,24 +108,28 @@ void SetAttribute(Info& info, IR::Attribute attr) { case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: info.stores_color_front_diffuse = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ColorFrontSpecularR: case IR::Attribute::ColorFrontSpecularG: case IR::Attribute::ColorFrontSpecularB: case IR::Attribute::ColorFrontSpecularA: info.stores_color_front_specular = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ColorBackDiffuseR: case IR::Attribute::ColorBackDiffuseG: case IR::Attribute::ColorBackDiffuseB: case IR::Attribute::ColorBackDiffuseA: info.stores_color_back_diffuse = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ColorBackSpecularR: case IR::Attribute::ColorBackSpecularG: case IR::Attribute::ColorBackSpecularB: case IR::Attribute::ColorBackSpecularA: - info.stores_color_front_specular = true; + info.stores_color_back_specular = true; + info.stores_legacy_varyings = true; break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 9f7f0b42c..7536c9caf 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -128,6 +128,7 @@ struct Info { bool loads_instance_id{}; bool loads_vertex_id{}; bool loads_front_face{}; + bool loads_legacy_varyings{}; bool loads_tess_coord{}; @@ -150,6 +151,7 @@ struct Info { bool stores_clip_distance{}; bool stores_fog_coordinate{}; bool stores_viewport_mask{}; + bool stores_legacy_varyings{}; bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; From 5e7b2b9661bf685c3950d7c4065d0d35b488f95c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 13 Jun 2021 00:05:19 -0400 Subject: [PATCH 671/770] glsl: Add stubs for sparse queries and variable aoffi when not supported --- .../backend/glsl/emit_context.cpp | 2 +- .../backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 46 ++++++++++++++----- src/shader_recompiler/profile.h | 2 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++ .../renderer_opengl/gl_shader_cache.cpp | 2 + 7 files changed, 47 insertions(+), 13 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index f0e9dffc2..d0880bdcb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -384,7 +384,7 @@ void EmitContext::SetupExtensions() { profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } - if (info.uses_sparse_residency) { + if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } if (info.stores_viewport_mask && profile.support_viewport_mask) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index d76b63b2d..6d64913bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -215,7 +215,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header += fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); } - ctx.header += "\nvoid main(){\n"; + ctx.header += "void main(){\n"; if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; // TODO: Properly resolve attribute issues diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 51181d1c1..c6b3df9c9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -94,7 +94,11 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { break; } } - const auto offset_str{ctx.var_alloc.Consume(offset)}; + const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; + if (!has_var_aoffi) { + // LOG_WARNING("Device does not support variable texture offsets, STUBBING"); + } + const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; switch (offset.Type()) { case IR::Type::U32: return fmt::format("int({})", offset_str); @@ -146,7 +150,12 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { @@ -163,7 +172,6 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } return; } - // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); @@ -186,7 +194,12 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.IsEmpty()) { ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, GetOffsetVec(ctx, offset)); @@ -195,7 +208,6 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } return; } - // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, @@ -315,7 +327,12 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, info.gather_component); @@ -332,7 +349,6 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, info.gather_component); return; } - // TODO: Query sparseTexels extension support if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", *sparse_inst, texture, coords, texel, info.gather_component); @@ -358,7 +374,12 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); return; @@ -373,7 +394,6 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); return; } - // TODO: Query sparseTexels extension support if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, texture, coords, dref, texel); @@ -404,7 +424,12 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.empty()) { ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); @@ -418,7 +443,6 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } return; } - // TODO: Query sparseTexels extension support if (!offset.empty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 246995190..236c79a0a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -87,6 +87,8 @@ struct Profile { bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; bool support_gl_warp_intrinsics{}; + bool support_gl_variable_aoffi{}; + bool support_gl_sparse_textures{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 20ea42cff..bf08a6d93 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ff0ff2b08..0b59c9df0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool HasSparseTexture2() const { + return has_sparse_texture_2; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -165,6 +169,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 884739aec..3d59d34d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .support_gl_warp_intrinsics = false, + .support_gl_variable_aoffi = device.HasVariableAoffi(), + .support_gl_sparse_textures = device.HasSparseTexture2(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), From a0d0704affa0f86ba29ef59d90fa06c1b7c974da Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 13 Jun 2021 19:12:03 -0400 Subject: [PATCH 672/770] glsl: Conditionally add EXT_texture_shadow_lod --- src/shader_recompiler/backend/glsl/emit_context.cpp | 8 +++++--- .../ir_opt/collect_shader_info_pass.cpp | 10 +++++++++- src/shader_recompiler/shader_info.h | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index d0880bdcb..e18f8257e 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -302,9 +302,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; case Stage::Compute: stage_name = "cs"; + const u32 local_x{std::max(program.workgroup_size[0], 1u)}; + const u32 local_y{std::max(program.workgroup_size[1], 1u)}; + const u32 local_z{std::max(program.workgroup_size[2], 1u)}; header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", - program.workgroup_size[0], program.workgroup_size[1], - program.workgroup_size[2]); + local_x, local_y, local_z); break; } SetupOutPerVertex(*this, header); @@ -346,7 +348,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } void EmitContext::SetupExtensions() { - if (profile.support_gl_texture_shadow_lod) { + if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } if (info.uses_int64) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 10d2822ae..47933df97 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -636,7 +636,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: - case IR::Opcode::ImageQueryLod: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; @@ -644,6 +643,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageQueryLod: { + const auto flags{inst.Flags()}; + const TextureType type{flags.type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; + info.uses_shadow_lod |= flags.is_depth != 0; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } case IR::Opcode::ImageRead: { const auto flags{inst.Flags()}; info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 7536c9caf..74d7a6a94 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -196,6 +196,7 @@ struct Info { bool uses_int64_bit_atomics{}; bool uses_global_memory{}; bool uses_atomic_image_u32{}; + bool uses_shadow_lod{}; IR::Type used_constant_buffer_types{}; IR::Type used_storage_buffer_types{}; From 6c6a451d6afdfe02dc24b06f54b2a183f73a04fb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 11:27:37 -0400 Subject: [PATCH 673/770] glsl: Add LoopSafety instructions --- .../backend/glsl/emit_glsl_instructions.h | 2 ++ .../backend/glsl/emit_glsl_not_implemented.cpp | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 0c717664f..9e812dabb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -47,6 +47,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index f420fe388..0a28a1ffc 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -46,6 +46,14 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } +void EmitSetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } From ae4e452759573d145738688d9284077934e61ae4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 11:32:28 -0400 Subject: [PATCH 674/770] glsl: Add Shader_GLSL logging --- .../backend/glsl/emit_glsl_atomic.cpp | 13 +++++----- .../glsl/emit_glsl_context_get_set.cpp | 23 ++++++++++-------- .../backend/glsl/emit_glsl_image.cpp | 24 ++++++++++--------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 850eee1e1..9152ace98 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -98,7 +98,7 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, pointer_offset); ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", @@ -171,7 +171,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - // LOG_WARNING(..., "Op falling to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -182,7 +182,7 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - // LOG_WARNING(..., "Op falling to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -195,7 +195,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - // LOG_WARNING(..., "Op falling to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -207,7 +207,7 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - // LOG_WARNING(..., "Op falling to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -220,8 +220,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - // LOG_WARNING(..., "Op falling to non-atomic"); - + LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3eeccfb3c..0d1e5ed7f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -42,7 +42,7 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const s32 signed_offset{static_cast(offset.U32())}; static constexpr u32 cbuf_size{4096 * 16}; if (signed_offset < 0 || offset.U32() > cbuf_size) { - // LOG_WARNING(..., "Immediate constant buffer offset is out of bounds"); + LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); ctx.Add("{}=0u;", ret); return; } @@ -144,7 +144,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const u32 u32_offset{offset.U32()}; const s32 signed_offset{static_cast(offset.U32())}; if (signed_offset < 0 || u32_offset > cbuf_size) { - // LOG_WARNING(..., "Immediate constant buffer offset is out of bounds"); + LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); ctx.AddU32x2("{}=uvec2(0u);", inst); return; } @@ -184,7 +184,8 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } // GLSL only exposes 8 legacy texcoords if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { - // LOG_WARNING(..., "GLSL does not allow access to gl_TexCoord[{}]", TexCoordIndex(attr)); + LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", + TexCoordIndex(attr)); ctx.AddF32("{}=0.f;", inst); return; } @@ -257,7 +258,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val const char swizzle{"xyzw"[element]}; // GLSL only exposes 8 legacy texcoords if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { - // LOG_WARNING(..., "GLSL does not allow access to gl_TexCoord[{}]", TexCoordIndex(attr)); + LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", + TexCoordIndex(attr)); return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { @@ -269,8 +271,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::Layer: if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_index_layer_non_geometry) { - // LOG_WARNING(..., "Shader stores viewport layer but device does not support viewport - // layer extension"); + LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support " + "viewport layer extension"); break; } ctx.Add("gl_Layer=ftoi({});", value); @@ -278,16 +280,17 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::ViewportIndex: if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_index_layer_non_geometry) { - // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport - // layer extension"); + LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support " + "viewport layer extension"); break; } ctx.Add("gl_ViewportIndex=ftoi({});", value); break; case IR::Attribute::ViewportMask: if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { - // LOG_WARNING(..., "Shader stores viewport mask but device does not support viewport - // mask extension"); + LOG_WARNING( + Shader_GLSL, + "Shader stores viewport mask but device does not support viewport mask extension"); break; } ctx.Add("gl_ViewportMask[0]=ftoi({});", value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index c6b3df9c9..447eb8e0a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -96,7 +96,7 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { } const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; if (!has_var_aoffi) { - // LOG_WARNING("Device does not support variable texture offsets, STUBBING"); + LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING"); } const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; switch (offset.Type()) { @@ -116,7 +116,7 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { - // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); + LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING"); return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; } const IR::Opcode opcode{values[0]->GetOpcode()}; @@ -152,7 +152,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto sparse_inst{PrepareSparse(inst)}; const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; if (sparse_inst && !supports_sparse) { - // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { @@ -196,7 +196,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto sparse_inst{PrepareSparse(inst)}; const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; if (sparse_inst && !supports_sparse) { - // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { @@ -239,9 +239,10 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && ctx.stage != Stage::Fragment && needs_shadow_ext}; if (use_grad) { - // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + LOG_WARNING(Shader_GLSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); if (info.type == TextureType::ColorArrayCube) { - // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); ctx.AddF32("{}=0.0f;", inst); return; } @@ -291,9 +292,10 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; if (use_grad) { - // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + LOG_WARNING(Shader_GLSL, + "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); if (info.type == TextureType::ColorArrayCube) { - // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); ctx.AddF32("{}=0.0f;", inst); return; } @@ -329,7 +331,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto sparse_inst{PrepareSparse(inst)}; const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; if (sparse_inst && !supports_sparse) { - // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { @@ -376,7 +378,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde const auto sparse_inst{PrepareSparse(inst)}; const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; if (sparse_inst && !supports_sparse) { - // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { @@ -426,7 +428,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; if (sparse_inst && !supports_sparse) { - // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); ctx.AddU1("{}=true;", *sparse_inst); } if (!sparse_inst || !supports_sparse) { From 74f683787eeba7b6e8f5868134f445240733f8fd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 21:06:29 -0400 Subject: [PATCH 675/770] gl_shader_cache: Implement async shaders --- src/video_core/CMakeLists.txt | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 137 ++++++++++-------- .../renderer_opengl/gl_graphics_pipeline.h | 14 +- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 54 ++++--- .../renderer_opengl/gl_shader_cache.h | 34 ++--- .../renderer_opengl/gl_shader_context.h | 33 +++++ 7 files changed, 161 insertions(+), 114 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_shader_context.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1ef3a6189..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -83,6 +83,7 @@ add_library(video_core STATIC renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h + renderer_opengl/gl_shader_context.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d64723d6b..d27a3cf46 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -6,11 +6,13 @@ #include #include "common/cityhash.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -117,74 +119,91 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{assembly_sources[stage]}; - if (code.empty()) { - continue; + auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } + LinkProgram(program.handle); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += + AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += + AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } + is_built.store(true, std::memory_order_relaxed); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{glsl_sources[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } - LinkProgram(program.handle); - } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + func(nullptr); } } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index dc791be53..58deafd3c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,10 +20,15 @@ namespace OpenGL { +namespace ShaderContext { +struct Context; +} + class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderWorker = Common::StatefulThreadWorker; struct GraphicsPipelineKey { std::array unique_hashes; @@ -65,8 +70,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); @@ -82,6 +87,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -108,6 +117,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7513bd071..e3d336f86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -70,7 +70,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, - buffer_cache, program_manager, state_tracker), + buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d59d34d7..d082b9f73 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -17,7 +17,6 @@ #include "common/scope_exit.h" #include "common/thread_worker.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -50,6 +49,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; using VideoCommon::SerializePipeline; +using Context = ShaderContext::Context; template auto MakeSpan(Container& container) { @@ -143,25 +143,17 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace -struct ShaderCache::Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; -}; - ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_) + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, - use_asynchronous_shaders{device.UseAsynchronousShaders()}, + shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, profile{ .supported_spirv = 0x00010000, @@ -264,7 +256,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; std::lock_guard lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -311,6 +303,9 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (is_new) { program = CreateGraphicsPipeline(); } + if (!program || !program->IsBuilt()) { + return nullptr; + } return program.get(); } @@ -339,7 +334,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), + use_asynchronous_shaders)}; if (!pipeline || shader_cache_filename.empty()) { return pipeline; } @@ -354,8 +350,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs) try { + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -394,8 +390,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_sources; - std::array glsl_sources; + std::array sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -412,14 +407,16 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } + auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -442,9 +439,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( return pipeline; } -std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineKey& key, - Shader::Environment& env) try { +std::unique_ptr ShaderCache::CreateComputePipeline( + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -465,11 +462,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } -std::unique_ptr> ShaderCache::CreateWorkers() - const { - return std::make_unique>( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); +std::unique_ptr ShaderCache::CreateWorkers() const { + return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0c5a06d8..d24b54d90 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -13,13 +13,12 @@ #include "common/common_types.h" #include "common/thread_worker.h" -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_context.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -31,29 +30,17 @@ namespace OpenGL { class Device; class ProgramManager; class RasterizerOpenGL; - -struct ShaderPools { - void ReleaseContents() { - flow_block.ReleaseContents(); - block.ReleaseContents(); - inst.ReleaseContents(); - } - - Shader::ObjectPool inst; - Shader::ObjectPool block; - Shader::ObjectPool flow_block; -}; +using ShaderWorker = Common::StatefulThreadWorker; class ShaderCache : public VideoCommon::ShaderCache { - struct Context; - public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_); ~ShaderCache(); void LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -67,17 +54,17 @@ private: std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs); + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputePipeline(ShaderPools& pools, + std::unique_ptr CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env); - std::unique_ptr> CreateWorkers() const; + std::unique_ptr CreateWorkers() const; Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -85,17 +72,18 @@ private: BufferCache& buffer_cache; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineKey graphics_key{}; const bool use_asynchronous_shaders; - ShaderPools main_pools; + ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; Shader::Profile profile; std::filesystem::path shader_cache_filename; - std::unique_ptr> workers; + std::unique_ptr workers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/emu_window.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace OpenGL::ShaderContext { +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + +struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + +} // namespace OpenGL::ShaderContext From 6eea88d6149f7122777b325c7fc8549e2a974e64 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:02:07 -0400 Subject: [PATCH 676/770] glsl: Cleanup/Address feedback --- .../backend/glsl/emit_glsl.cpp | 8 ++++---- .../backend/glsl/emit_glsl_atomic.cpp | 12 ++++++------ .../backend/glsl/emit_glsl_composite.cpp | 3 +-- .../backend/glsl/emit_glsl_context_get_set.cpp | 18 +++++------------- .../backend/glsl/emit_glsl_integer.cpp | 1 + .../backend/glsl/emit_glsl_shared_memory.cpp | 1 + .../backend/glsl/emit_glsl_warp.cpp | 2 +- .../backend/glsl/var_alloc.cpp | 3 +-- src/shader_recompiler/profile.h | 2 ++ .../renderer_opengl/gl_shader_cache.cpp | 2 ++ 10 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 6d64913bb..9f8cf659f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -156,8 +156,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("if({}){{continue;}}else{{break;}}}}", - ctx.var_alloc.Consume(node.data.repeat.cond)); + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); break; default: throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); @@ -166,7 +165,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { return " compatibility"; } return ""; @@ -187,7 +186,8 @@ void DefineVariables(const EmitContext& ctx, std::string& header) { const auto type{static_cast(i)}; const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; const auto type_name{ctx.var_alloc.GetGlslType(type)}; - const auto precise{IsPreciseType(type) ? "precise " : ""}; + const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; + const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; // Temps/return types that are never used are stored at index 0 if (tracker.uses_temp) { header += fmt::format("{}{} t{}={}(0);", precise, type_name, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 9152ace98..772acc5a4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -98,7 +98,7 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, pointer_offset); ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", @@ -171,7 +171,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -182,7 +182,7 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -195,7 +195,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -207,7 +207,7 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -220,7 +220,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 7421ce97d..49a66e3ec 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -17,8 +17,7 @@ void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view // The result is aliased with the composite ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } else { - ctx.Add("{}={};", result, composite); - ctx.Add("{}.{}={};", result, SWIZZLE[index], object); + ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); } } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0d1e5ed7f..edeecc26e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -30,7 +30,7 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } -std::string OutputVertexIndex(EmitContext& ctx) { +std::string_view OutputVertexIndex(EmitContext& ctx) { return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } @@ -40,7 +40,7 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const bool is_immediate{offset.IsImmediate()}; if (is_immediate) { const s32 signed_offset{static_cast(offset.U32())}; - static constexpr u32 cbuf_size{4096 * 16}; + static constexpr u32 cbuf_size{0x10000}; if (signed_offset < 0 || offset.U32() > cbuf_size) { LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); ctx.Add("{}=0u;", ret); @@ -140,7 +140,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - static constexpr u32 cbuf_size{4096 * 16}; + static constexpr u32 cbuf_size{0x10000}; const u32 u32_offset{offset.U32()}; const s32 signed_offset{static_cast(offset.U32())}; if (signed_offset < 0 || u32_offset > cbuf_size) { @@ -308,21 +308,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::ColorFrontDiffuseG: case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: - if (ctx.stage == Stage::Fragment) { - ctx.Add("gl_Color.{}={};", swizzle, value); - } else { - ctx.Add("gl_FrontColor.{}={};", swizzle, value); - } + ctx.Add("gl_FrontColor.{}={};", swizzle, value); break; case IR::Attribute::ColorFrontSpecularR: case IR::Attribute::ColorFrontSpecularG: case IR::Attribute::ColorFrontSpecularB: case IR::Attribute::ColorFrontSpecularA: - if (ctx.stage == Stage::Fragment) { - ctx.Add("gl_SecondaryColor.{}={};", swizzle, value); - } else { - ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); - } + ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::ColorBackDiffuseR: case IR::Attribute::ColorBackDiffuseG: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 7a2f79d10..983e6d95d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -28,6 +28,7 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { sign->Invalidate(); } } // Anonymous namespace + void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 8a13bf617..518b78f06 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -20,6 +20,7 @@ void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); } } // Anonymous namespace + void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 7047928fd..4d418cbbc 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -43,7 +43,7 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); SetInBoundsFlag(ctx, inst); } -} // namespace +} // Anonymous namespace void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index cbf56491c..194f926ca 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -177,8 +177,7 @@ Id VarAlloc::Alloc(GlslVarType type) { void VarAlloc::Free(Id id) { if (id.is_valid == 0) { - // throw LogicError("Freeing invalid variable"); - return; + throw LogicError("Freeing invalid variable"); } auto& use_tracker{GetUseTracker(id.type)}; use_tracker.var_use[id.index] = false; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 236c79a0a..6db794e91 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -105,6 +105,8 @@ struct Profile { bool has_broken_signed_operations{}; /// Dynamic vec4 indexing is broken on some OpenGL drivers bool has_gl_component_indexing_bug{}; + /// The precise type qualifier is broken in the fragment stage of some drivers + bool has_gl_precise_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d082b9f73..5ffe28d45 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -196,6 +196,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), + .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, } { if (use_asynchronous_shaders) { From 3b339fbbf65a50ec2ec8baacd175ca7577c3b8bd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:33:26 -0400 Subject: [PATCH 677/770] glsl: Conditionally use fine/coarse derivatives based on device support --- .../backend/glsl/emit_context.cpp | 3 ++ .../backend/glsl/emit_glsl_warp.cpp | 28 ++++++++++++++++--- src/shader_recompiler/profile.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e18f8257e..0e8fe017d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -395,6 +395,9 @@ void EmitContext::SetupExtensions() { if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } + if (info.uses_derivatives && profile.support_gl_derivative_control) { + header += "#extension GL_ARB_derivative_control : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 4d418cbbc..a982dd8a2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -180,18 +180,38 @@ void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, st } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdxFine({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } } void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdyFine({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } } void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } } void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6db794e91..e8cfc03af 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -89,6 +89,7 @@ struct Profile { bool support_gl_warp_intrinsics{}; bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; + bool support_gl_derivative_control{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5ffe28d45..fedbce2f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_warp_intrinsics = false, .support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_sparse_textures = device.HasSparseTexture2(), + .support_gl_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), From c5dfa0b6308fa0f79b3323204b6c61d38d43ff87 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 00:30:59 -0400 Subject: [PATCH 678/770] glsl: Move gl_Position/generic attribute initialization to EmitProlgue --- .../backend/glsl/emit_glsl.cpp | 9 --------- .../backend/glsl/emit_glsl_special.cpp | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 9f8cf659f..ff869923f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -216,15 +216,6 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); } ctx.header += "void main(){\n"; - if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { - ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < program.info.stores_generics.size() / 2; ++index) { - if (!program.info.stores_generics[index]) { - ctx.header += fmt::format("out_attr{}=vec4(0,0,0,1);", index); - } - } - } DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { ctx.header += "uint carry;"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 2155b8802..cfef58d79 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { @@ -42,13 +43,19 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& ctx.Add("{}={};", phi_reg, val_reg); } -void EmitPrologue(EmitContext&) { - // TODO +void EmitPrologue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { + ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); + // TODO: Properly resolve attribute issues + for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { + if (!ctx.info.stores_generics[index]) { + ctx.Add("out_attr{}=vec4(0,0,0,1);", index); + } + } + } } -void EmitEpilogue(EmitContext&) { - // TODO -} +void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:23:57 -0400 Subject: [PATCH 679/770] glsl: Address rest of feedback --- .../backend/glsl/emit_context.cpp | 44 ++++++++++++++----- .../backend/glsl/emit_context.h | 2 + .../glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 22 ++++++---- src/shader_recompiler/ir_opt/texture_pass.cpp | 11 ++++- src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 2 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ .../renderer_opengl/gl_graphics_pipeline.cpp | 32 +++++++------- .../renderer_opengl/gl_shader_cache.cpp | 1 + 11 files changed, 86 insertions(+), 38 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0e8fe017d..d224c4d84 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) { } } +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + std::string_view GetTessMode(TessPrimitive primitive) { switch (primitive) { case TessPrimitive::Triangles: @@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { - header += "#pragma optionNV(fastmath off)\n"; + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } SetupExtensions(); stage = program.stage; switch (program.stage) { @@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { + if (info.stores_generics[index] || StageInitializesVaryings()) { DefineGenericOutput(index, program.invocations); } } @@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } +bool EmitContext::StageInitializesVaryings() const noexcept { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + return true; + default: + return false; + } +} + void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; @@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_atomic_float : enable\n"; } if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { - header += "#extension NV_shader_atomic_fp16_vector : enable\n"; + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { if (profile.support_gl_nv_gpu_shader_5) { @@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() { if (info.stores_viewport_mask && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } - if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { + if (info.uses_typeless_image_reads) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } if (info.uses_derivatives && profile.support_gl_derivative_control) { @@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); } - write_func += "}"; - write_func_64 += "}"; - write_func_128 += "}"; + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; load_func += "return 0u;}"; load_func_64 += "return uvec2(0);}"; load_func_128 += "return uvec4(0);}"; @@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) { for (const auto& desc : info.image_buffer_descriptors) { image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, - format, bindings.image, array_decorator); + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); bindings.image += desc.count; } images.reserve(info.image_descriptors.size()); @@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) { images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; - const auto qualifier{desc.is_written ? "" : "readonly "}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, qualifier, image_type, bindings.image, array_decorator); diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8fa87c02c..4a50556e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,6 +136,8 @@ public: code += '\n'; } + [[nodiscard]] bool StageInitializesVaryings() const noexcept; + std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index edeecc26e..a241d18fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::FogCoordinate: - ctx.Add("gl_FogFragCoord.x={};", value); + ctx.Add("gl_FogFragCoord={};", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index cfef58d79..59ca52f07 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -10,6 +10,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void InitializeVaryings(EmitContext& ctx) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + // TODO: Properly resolve attribute issues + for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { + if (!ctx.info.stores_generics[index]) { + ctx.Add("out_attr{}=vec4(0,0,0,1);", index); + } + } +} +} // Anonymous namespace void EmitPhi(EmitContext& ctx, IR::Inst& phi) { const size_t num_args{phi.NumArgs()}; @@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { - ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { - ctx.Add("out_attr{}=vec4(0,0,0,1);", index); - } - } + if (ctx.StageInitializesVaryings()) { + InitializeVaryings(ctx); } } @@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e9098239d..737f186ab 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -312,11 +312,14 @@ public: } u32 Add(const ImageBufferDescriptor& desc) { - return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + image_buffer_descriptors[index].is_written |= desc.is_written; + image_buffer_descriptors[index].is_read |= desc.is_read; + return index; } u32 Add(const TextureDescriptor& desc) { @@ -339,6 +342,7 @@ public: desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; + image_descriptors[index].is_read |= desc.is_read; return index; } @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { .type = flags.type, .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e8cfc03af..a3c412a0f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -97,6 +97,8 @@ struct Profile { /// Fragment outputs have to be declared even if they are not written to avoid undefined values. /// See Ori and the Blind Forest's main menu for reference. bool need_declared_frag_colors{}; + /// Prevents fast math optimizations that may cause inaccuracies + bool need_fastmath_off{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 74d7a6a94..e9ebc16a4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vectorMarkShaderComplete(); + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } is_built.store(true, std::memory_order_relaxed); }}; if (thread_worker) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, From 12ef06ba8bca5b20069e24b36f9216d01d4fe904 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 21:01:44 -0400 Subject: [PATCH 680/770] glsl: Obey need_declared_frag_colors to declare and initialize all frag_color Fixes Ori and the blind forest title screen --- src/shader_recompiler/backend/glsl/emit_context.cpp | 2 +- src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index d224c4d84..54aa88b63 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -340,7 +340,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); } for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { - if (!info.stores_frag_color[index]) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { continue; } header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 59ca52f07..f8e8aaa67 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -58,6 +59,14 @@ void EmitPrologue(EmitContext& ctx) { if (ctx.StageInitializesVaryings()) { InitializeVaryings(ctx); } + if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { + for (size_t index = 0; index < ctx.info.stores_frag_color.size(); ++index) { + if (ctx.info.stores_frag_color[index]) { + continue; + } + ctx.Add("frag_color{}=vec4(0,0,0,1);", index); + } + } } void EmitEpilogue(EmitContext&) {} From 69f9b97e7ed1e873657105cff27ed9095ee277ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 22:48:55 -0300 Subject: [PATCH 681/770] vulkan_device: Blacklist VK_EXT_vertex_input_dynamic_state on Intel --- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index bf063c047..9754abcf8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -491,6 +491,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); ext_extended_dynamic_state = false; } + if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); + ext_vertex_input_dynamic_state = false; + } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: [PATCH 682/770] shader: Rename maxwell/program.h to translate_program.h --- src/shader_recompiler/CMakeLists.txt | 4 ++-- .../maxwell/{program.cpp => translate_program.cpp} | 2 +- .../frontend/maxwell/{program.h => translate_program.h} | 7 +------ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 5 files changed, 6 insertions(+), 11 deletions(-) rename src/shader_recompiler/frontend/maxwell/{program.cpp => translate_program.cpp} (99%) rename src/shader_recompiler/frontend/maxwell/{program.h => translate_program.h} (89%) diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 06ee50fff..f801a9f72 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -114,8 +114,6 @@ add_library(shader_recompiler STATIC frontend/maxwell/maxwell.inc frontend/maxwell/opcodes.cpp frontend/maxwell/opcodes.h - frontend/maxwell/program.cpp - frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -211,6 +209,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h + frontend/maxwell/translate_program.cpp + frontend/maxwell/translate_program.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp similarity index 99% rename from src/shader_recompiler/frontend/maxwell/program.cpp rename to src/shader_recompiler/frontend/maxwell/translate_program.cpp index 8489f9a5f..e52170e3e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -10,9 +10,9 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h similarity index 89% rename from src/shader_recompiler/frontend/maxwell/program.h rename to src/shader_recompiler/frontend/maxwell/translate_program.h index f7f5930e4..1e5536443 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -4,13 +4,8 @@ #pragma once -#include -#include -#include - -#include - #include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 620666622..c05cd5d28 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,7 +22,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b17f34cdd..0b6fe8e2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,7 +20,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/program_header.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: [PATCH 683/770] shader: Add support for native 16-bit floats --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 8 +++++--- .../frontend/maxwell/translate_program.h | 3 ++- src/shader_recompiler/host_translate_info.h | 18 ++++++++++++++++++ .../renderer_opengl/gl_shader_cache.cpp | 12 ++++++++---- .../renderer_opengl/gl_shader_cache.h | 3 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- .../renderer_vulkan/vk_pipeline_cache.h | 3 +++ src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 9 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/host_translate_info.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f801a9f72..164e94071 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/translate.h frontend/maxwell/translate_program.cpp frontend/maxwell/translate_program.h + host_translate_info.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +// Try to keep entries here to a minimum +// They can accidentally change the cached information in a shader + +/// Misc information about the host +struct HostTranslateInfo { + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d28..b459397f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + }, + host_info{ + .support_float16 = false, + .support_int64 = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -373,15 +377,15 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); for (const auto& desc : programs[index].info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; for (const auto& desc : program_vb.info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } @@ -449,7 +453,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; u32 num_storage_buffers{}; for (const auto& desc : program.info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d90..6952a1f2c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" @@ -82,6 +83,8 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path shader_cache_filename; std::unique_ptr workers; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf8..0d8c6cd08 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - // is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); From 0ffea97e2ea2c8f58928e13dc2488d620ea98ea8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:22:56 -0300 Subject: [PATCH 684/770] shader: Split profile and runtime info headers --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 1 + .../backend/glasm/emit_glasm.cpp | 1 + .../backend/glasm/emit_glasm.h | 1 + .../backend/glasm/emit_glasm_memory.cpp | 2 +- .../backend/glsl/emit_context.cpp | 1 + .../backend/glsl/emit_glsl.h | 1 + .../backend/glsl/emit_glsl_instructions.h | 3 - .../backend/spirv/emit_context.h | 1 + src/shader_recompiler/profile.h | 72 ---------------- src/shader_recompiler/runtime_info.h | 83 +++++++++++++++++++ .../renderer_opengl/gl_shader_cache.h | 1 + src/video_core/transform_feedback.h | 2 +- 13 files changed, 93 insertions(+), 77 deletions(-) create mode 100644 src/shader_recompiler/runtime_info.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 164e94071..f6719ad9d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -226,6 +226,7 @@ add_library(shader_recompiler STATIC object_pool.h profile.h program_header.h + runtime_info.h shader_info.h ) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 08918a5c2..21e14867c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 832b4fd40..66e4aea04 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3df32a4a6..bcb55f062 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index cafb5c92a..af9fac7c1 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -8,7 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 54aa88b63..93057ebb9 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h index fe221fa7c..20e5719e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 9e812dabb..df28036e4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -18,9 +18,6 @@ class Value; namespace Shader::Backend::GLSL { class EmitContext; -inline void EmitSetLoopSafetyVariable(EmitContext&) {} -inline void EmitGetLoopSafetyVariable(EmitContext&) {} - #define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) // Microinstruction emitters diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 961c9180c..527685fb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -12,6 +12,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/shader_info.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a3c412a0f..d46be1638 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -4,59 +4,10 @@ #pragma once -#include -#include -#include - #include "common/common_types.h" namespace Shader { -enum class AttributeType : u8 { - Float, - SignedInt, - UnsignedInt, - Disabled, -}; - -enum class InputTopology { - Points, - Lines, - LinesAdjacency, - Triangles, - TrianglesAdjacency, -}; - -enum class CompareFunction { - Never, - Less, - Equal, - LessThanEqual, - Greater, - NotEqual, - GreaterThanEqual, - Always, -}; - -enum class TessPrimitive { - Isolines, - Triangles, - Quads, -}; - -enum class TessSpacing { - Equal, - FractionalOdd, - FractionalEven, -}; - -struct TransformFeedbackVarying { - u32 buffer{}; - u32 stride{}; - u32 offset{}; - u32 components{}; -}; - struct Profile { u32 supported_spirv{0x00010000}; @@ -114,27 +65,4 @@ struct Profile { bool ignore_nan_fp_comparisons{}; }; -struct RuntimeInfo { - std::array generic_input_types{}; - bool convert_depth_mode{}; - bool force_early_z{}; - - TessPrimitive tess_primitive{}; - TessSpacing tess_spacing{}; - bool tess_clockwise{}; - - InputTopology input_topology{}; - - std::optional fixed_state_point_size; - std::optional alpha_test_func; - float alpha_test_reference{}; - - // Static y negate value - bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM - bool glasm_use_storage_buffers{}; - - std::vector xfb_varyings; -}; - } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 000000000..d4b047b4d --- /dev/null +++ b/src/shader_recompiler/runtime_info.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" + +namespace Shader { + +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, + Disabled, +}; + +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + +struct TransformFeedbackVarying { + u32 buffer{}; + u32 stride{}; + u32 offset{}; + u32 components{}; +}; + +struct RuntimeInfo { + std::array generic_input_types{}; + bool convert_depth_mode{}; + bool force_early_z{}; + + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + + InputTopology input_topology{}; + + std::optional fixed_state_point_size; + std::optional alpha_test_func; + float alpha_test_reference{}; + + // Static y negate value + bool y_negate{}; + // Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; + + std::vector xfb_varyings; +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6952a1f2c..ff5707119 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -16,6 +16,7 @@ #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index 6832c6db1..8f6946d65 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -8,7 +8,7 @@ #include #include "common/common_types.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/engines/maxwell_3d.h" namespace VideoCommon { From 892b8aa2adbadf1935aa5e9d87abbc686702cb2c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:48:05 -0300 Subject: [PATCH 685/770] glsl: Only declare fragment outputs on fragment shaders --- src/shader_recompiler/backend/glsl/emit_context.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 93057ebb9..bd40356a1 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -340,11 +340,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); } - for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { - if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { - continue; + if (stage == Stage::Fragment) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { + continue; + } + header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } - header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: [PATCH 686/770] shader: Properly manage attributes not written from previous stages --- .../backend/glsl/emit_context.cpp | 26 ++++++------------- .../backend/glsl/emit_context.h | 2 -- .../glsl/emit_glsl_context_get_set.cpp | 5 ++++ .../backend/glsl/emit_glsl_special.cpp | 18 ++++++------- .../backend/spirv/emit_context.cpp | 3 +++ .../spirv/emit_spirv_context_get_set.cpp | 2 +- .../frontend/maxwell/translate_program.cpp | 4 ++- .../ir_opt/collect_shader_info_pass.cpp | 6 +++-- src/shader_recompiler/runtime_info.h | 8 ++++-- src/shader_recompiler/shader_info.h | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 11 +++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +++++++++--- 12 files changed, 62 insertions(+), 41 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index bd40356a1..14c009535 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -327,11 +327,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; - if (generic.used) { - header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, - InputArrayDecorator(stage)); + if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + continue; } + header += + fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,10 +350,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } } for (size_t index = 0; index < info.stores_generics.size(); ++index) { - // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || StageInitializesVaryings()) { - DefineGenericOutput(index, program.invocations); + if (!info.stores_generics[index]) { + continue; } + DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -362,17 +363,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } -bool EmitContext::StageInitializesVaryings() const noexcept { - switch (stage) { - case Stage::VertexA: - case Stage::VertexB: - case Stage::Geometry: - return true; - default: - return false; - } -} - void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 4a50556e1..8fa87c02c 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,8 +136,6 @@ public: code += '\n'; } - [[nodiscard]] bool StageInitializesVaryings() const noexcept; - std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index a241d18fe..663ff3753 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { @@ -179,6 +180,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + ctx.AddF32("{}=0.f;", inst, attr); + return; + } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index f8e8aaa67..1a2d3dcea 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -12,11 +12,12 @@ namespace Shader::Backend::GLSL { namespace { -void InitializeVaryings(EmitContext& ctx) { - ctx.Add("gl_Position=vec4(0,0,0,1);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < 16; ++index) { + if (ctx.info.stores_generics[index]) { ctx.Add("out_attr{}=vec4(0,0,0,1);", index); } } @@ -56,9 +57,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.StageInitializesVaryings()) { - InitializeVaryings(ctx); - } + InitializeOutputVaryings(ctx); + if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { for (size_t index = 0; index < ctx.info.stores_frag_color.size(); ++index) { if (ctx.info.stores_frag_color[index]) { @@ -73,7 +73,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); - InitializeVaryings(ctx); + InitializeOutputVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 007b79650..612d087ad 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1209,6 +1209,9 @@ void EmitContext::DefineInputs(const Info& info) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { + if (!runtime_info.previous_stage_stores_generic[index]) { + continue; + } const InputVarying generic{info.input_generics[index]}; if (!generic.used) { continue; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 42fff74e3..4ac1fbae5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -286,7 +286,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type) { + if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 5250509c1..ed8729fca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -192,7 +192,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + if (vertex_b.info.stores_generics[index]) { + result.info.stores_generics[index] = true; + } } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 47933df97..bab32b58b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,7 +79,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { void SetAttribute(Info& info, IR::Attribute attr) { if (IR::IsGeneric(attr)) { - info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; + info.stores_generics[IR::GenericAttributeIndex(attr)] = true; return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { @@ -956,7 +956,9 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } if (info.stores_indexed_attributes) { for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); + if (header.vtg.IsOutputGenericVectorActive(i)) { + info.stores_generics[i] = true; + } } info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; info.stores_position |= header.vtg.omap_systemb.position != 0; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index d4b047b4d..63fe2afaf 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -59,6 +60,8 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; + std::bitset<32> previous_stage_stores_generic{}; + bool convert_depth_mode{}; bool force_early_z{}; @@ -72,11 +75,12 @@ struct RuntimeInfo { std::optional alpha_test_func; float alpha_test_reference{}; - // Static y negate value + /// Static Y negate value bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM + /// Use storage buffers instead of global pointers on GLASM bool glasm_use_storage_buffers{}; + /// Transform feedback state for each varying std::vector xfb_varyings; }; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e9ebc16a4..a20e15d2e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -140,7 +140,7 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::array stores_generics{}; + std::bitset<32> stores_generics{}; bool stores_layer{}; bool stores_viewport_index{}; bool stores_point_size{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b459397f5..b8b24dd3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -58,8 +58,15 @@ auto MakeSpan(Container& container) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, + const Shader::IR::Program* previous_program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } switch (program.stage) { case Shader::Stage::VertexB: case Shader::Stage::Geometry: @@ -400,6 +407,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; Shader::Backend::Bindings binding; + Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { @@ -413,12 +421,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( infos[stage_index] = &program.info; const auto runtime_info{ - MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } + previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72e6f4207..dc028306a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -90,7 +90,7 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso return {}; } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { if (attr.enabled == 0) { return Shader::AttributeType::Disabled; } @@ -124,9 +124,15 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; - + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; @@ -499,6 +505,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; + const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -511,7 +518,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -519,6 +526,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } + previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; From 1091995f8e5ba79d659ab39fe4dbbca26ad01488 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 05:02:19 -0300 Subject: [PATCH 687/770] shader: Simplify MergeDualVertexPrograms --- .../frontend/maxwell/translate_program.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ed8729fca..e728b43cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -174,14 +174,12 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; + if (term.type != IR::AbstractSyntaxNode::Type::Return) { + result.syntax_list.push_back(term); } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); } + result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), + vertex_b.syntax_list.end()); result.blocks = GenerateBlocks(result.syntax_list); result.post_order_blocks = vertex_b.post_order_blocks; for (const auto& block : vertex_a.post_order_blocks) { From ccbd24fe00734a8a1d146b4a209a543dc9c3f41e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 16 Jun 2021 19:59:08 -0400 Subject: [PATCH 688/770] glsl: Fix cbuf component indexing bug falback --- .../backend/glsl/emit_glsl_context_get_set.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 663ff3753..30c4cff81 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -37,8 +37,9 @@ std::string_view OutputVertexIndex(EmitContext& ctx) { void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const IR::Value& offset, u32 num_bits, std::string_view cast = {}, - bool component_indexing_bug = false, std::string_view bit_offset = {}) { + std::string_view bit_offset = {}) { const bool is_immediate{offset.IsImmediate()}; + const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; if (is_immediate) { const s32 signed_offset{static_cast(offset.U32())}; static constexpr u32 cbuf_size{0x10000}; @@ -77,12 +78,11 @@ void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (offset.IsImmediate()) { const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; - GetCbuf(ctx, ret, binding, offset, 8, cast, false, bit_offset); + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; - GetCbuf(ctx, ret, binding, offset, 8, cast, ctx.profile.has_gl_component_indexing_bug, - bit_offset); + GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); } } @@ -91,12 +91,11 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (offset.IsImmediate()) { const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; - GetCbuf(ctx, ret, binding, offset, 16, cast, false, bit_offset); + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); } else { const auto offset_var{ctx.var_alloc.Consume(offset)}; const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; - GetCbuf(ctx, ret, binding, offset, 16, cast, ctx.profile.has_gl_component_indexing_bug, - bit_offset); + GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); } } From ca67077ca87772b4b4ac61d08f5b2c60616348e0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 21:14:57 -0300 Subject: [PATCH 689/770] vk_graphics_pipeline: Use VK_KHR_push_descriptor when available ~51% faster on Nvidia compared to previous method. --- .../renderer_vulkan/pipeline_helper.h | 32 +++++++++++++------ .../renderer_vulkan/vk_compute_pipeline.cpp | 8 ++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 ++++++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../vulkan_common/vulkan_device.cpp | 11 +++++++ src/video_core/vulkan_common/vulkan_device.h | 12 +++++++ .../vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 31 +++++++++++------- 8 files changed, 88 insertions(+), 36 deletions(-) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index c6e5e059b..4847db6b6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -16,38 +16,50 @@ #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { class DescriptorLayoutBuilder { public: - DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} - vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + bool CanUsePushDescriptor() const noexcept { + return device->IsKhrPushDescriptorSupported() && + num_descriptors <= device->MaxPushDescriptors(); + } + + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { if (bindings.empty()) { return nullptr; } - return device->CreateDescriptorSetLayout({ + const VkDescriptorSetLayoutCreateFlags flags = + use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + return device->GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, - .flags = 0, + .flags = flags, .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }); } vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) const { + VkPipelineLayout pipeline_layout, + bool use_push_descriptor) const { if (entries.empty()) { return nullptr; } - return device->CreateDescriptorUpdateTemplateKHR({ + const VkDescriptorUpdateTemplateType type = + use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR + : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, .pNext = nullptr, .flags = 0, .descriptorUpdateEntryCount = static_cast(entries.size()), .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .templateType = type, .descriptorSetLayout = descriptor_set_layout, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .pipelineLayout = pipeline_layout, @@ -56,7 +68,7 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { - return device->CreatePipelineLayout({ + return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -97,14 +109,16 @@ private: .stride = sizeof(DescriptorUpdateEntry), }); ++binding; + num_descriptors += descriptors[i].count; offset += sizeof(DescriptorUpdateEntry); } } - const vk::Device* device{}; + const Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; + u32 num_descriptors{}; size_t offset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cc855a62e..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript uniform_buffer_sizes.begin()); auto func{[this, &descriptor_pool, shader_notify] { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(false); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = - builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e02b1b7ab..2b59a9d88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; constexpr size_t MAX_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline( } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - + uses_push_descriptor = builder.CanUsePushDescriptor(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); + if (!uses_push_descriptor) { + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + } const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); - descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + descriptor_update_template = + builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); @@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() { if (!descriptor_set_layout) { return; } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_set, nullptr); + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, + 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } }); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 40d1edabd..622267147 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -148,6 +148,7 @@ private: std::condition_variable build_condvar; std::mutex build_mutex; std::atomic_bool is_built{false}; + bool uses_push_descriptor{false}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0d8c6cd08..9d918de8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -767,6 +767,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); + test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -932,6 +933,16 @@ std::vector Device::LoadExtensions(bool requires_surface) { khr_workgroup_memory_explicit_layout = true; } } + if (khr_push_descriptor) { + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; + push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + push_descriptor.pNext = nullptr; + + physical_properties.pNext = &push_descriptor; + physical.GetProperties2KHR(physical_properties); + + max_push_descriptors = push_descriptor.maxPushDescriptors; + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4fda472b0..49605752d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -154,6 +154,11 @@ public: return guest_warp_stages & stage; } + /// Returns the maximum number of push descriptors. + u32 MaxPushDescriptors() const { + return max_push_descriptors; + } + /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { return is_formatless_image_load_supported; @@ -194,6 +199,11 @@ public: return khr_spirv_1_4; } + /// Returns true if the device supports VK_KHR_push_descriptor. + bool IsKhrPushDescriptorSupported() const { + return khr_push_descriptor; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -330,6 +340,7 @@ private: VkDriverIdKHR driver_id{}; ///< Driver ID. VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. @@ -345,6 +356,7 @@ private: bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 7e13ae8af..d7e9fac22 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); + X(vkCmdPushDescriptorSetWithTemplateKHR); X(vkCmdSetBlendConstants); X(vkCmdSetDepthBias); X(vkCmdSetDepthBounds); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 6e5be1186..d43b606f1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; PFN_vkBindBufferMemory vkBindBufferMemory{}; PFN_vkBindImageMemory vkBindImageMemory{}; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBeginQuery vkCmdBeginQuery{}; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; PFN_vkCmdBindPipeline vkCmdBindPipeline{}; PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; PFN_vkCmdBlitImage vkCmdBlitImage{}; PFN_vkCmdClearAttachments vkCmdClearAttachments{}; PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; @@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDispatch vkCmdDispatch{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdFillBuffer vkCmdFillBuffer{}; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; PFN_vkCmdPushConstants vkCmdPushConstants{}; + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; + PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; - PFN_vkCmdSetEvent vkCmdSetEvent{}; - PFN_vkCmdSetScissor vkCmdSetScissor{}; - PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; - PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; - PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; - PFN_vkCmdSetViewport vkCmdSetViewport{}; - PFN_vkCmdWaitEvents vkCmdWaitEvents{}; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; - PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; + PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; + PFN_vkCmdSetScissor vkCmdSetScissor{}; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; - PFN_vkCmdResolveImage vkCmdResolveImage{}; + PFN_vkCmdSetViewport vkCmdSetViewport{}; + PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; PFN_vkCreateCommandPool vkCreateCommandPool{}; @@ -990,6 +991,12 @@ public: dynamic_offsets.size(), dynamic_offsets.data()); } + void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, + VkPipelineLayout layout, u32 set, + const void* data) const noexcept { + dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); + } + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { dld->vkCmdBindPipeline(handle, bind_point, pipeline); } From b5e78607ad138ad6d915b1f754fa1e5bef9c1c3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 22:00:31 -0300 Subject: [PATCH 690/770] spirv: Handle small storage buffer loads on devices with no support --- src/shader_recompiler/backend/spirv/emit_context.cpp | 4 ++-- src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 612d087ad..32c21f3b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -952,13 +952,13 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types : IR::Type::U32}; - if (True(used_types & IR::Type::U8)) { + if (profile.support_int8 && True(used_types & IR::Type::U8)) { DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, sizeof(u8)); DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, sizeof(u8)); } - if (True(used_types & IR::Type::U16)) { + if (profile.support_int16 && True(used_types & IR::Type::U16)) { DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, sizeof(u16)); DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 37a66095f..ccebf170d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -124,7 +124,7 @@ void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { return ctx.OpUConvert(ctx.U32[1], LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, sizeof(u8), &StorageDefinitions::U8)); @@ -135,7 +135,7 @@ Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value } Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { return ctx.OpSConvert(ctx.U32[1], LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, sizeof(s8), &StorageDefinitions::S8)); @@ -146,7 +146,7 @@ Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value } Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { return ctx.OpUConvert(ctx.U32[1], LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, sizeof(u16), &StorageDefinitions::U16)); @@ -157,7 +157,7 @@ Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Valu } Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { return ctx.OpSConvert(ctx.U32[1], LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, sizeof(s16), &StorageDefinitions::S16)); From 59fead3a47227b513c0ca35090919823f44faecf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 17 Jun 2021 20:34:58 -0300 Subject: [PATCH 691/770] spirv: Properly handle devices without int8 and int16 --- .../backend/spirv/emit_context.cpp | 58 +++++++++++-------- .../spirv/emit_spirv_context_get_set.cpp | 54 +++++++++++------ 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 32c21f3b4..4c6501129 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -911,37 +911,45 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } - if (profile.support_descriptor_aliasing) { - if (True(info.used_constant_buffer_types & IR::Type::U8)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); - DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); - } - if (True(info.used_constant_buffer_types & IR::Type::U16)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', - sizeof(u16)); - DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', - sizeof(s16)); - } - if (True(info.used_constant_buffer_types & IR::Type::U32)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', - sizeof(u32)); - } - if (True(info.used_constant_buffer_types & IR::Type::F32)) { - DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', - sizeof(f32)); - } - if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', - sizeof(u32[2])); - } - binding += static_cast(info.constant_buffer_descriptors.size()); - } else { + IR::Type types{info.used_constant_buffer_types}; + if (!profile.support_descriptor_aliasing) { DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', sizeof(u32[4])); for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { binding += desc.count; } } + if (True(types & IR::Type::U8)) { + if (profile.support_int8) { + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } else { + types |= IR::Type::U32; + } + } + if (True(types & IR::Type::U16)) { + if (profile.support_int16) { + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', + sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', + sizeof(s16)); + } else { + types |= IR::Type::U32; + } + } + if (True(types & IR::Type::U32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); + } + if (True(types & IR::Type::F32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); + } + if (True(types & IR::Type::U32x2)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); + } + binding += static_cast(info.constant_buffer_descriptors.size()); } void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4ac1fbae5..2e364baec 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -144,6 +144,10 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, return ctx.OpLoad(result_type, access_chain); } +Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); +} + Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); } @@ -203,58 +207,74 @@ void EmitGetLoopSafetyVariable(EmitContext&) { } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; return ctx.OpUConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; - const Id element{GetCbufElement(ctx, vector, offset, 0u)}; - const Id bit_offset{ctx.BitOffset8(offset)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); + element = GetCbufElement(ctx, vector, offset, 0u); } + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); } Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; return ctx.OpSConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; - const Id element{GetCbufElement(ctx, vector, offset, 0u)}; - const Id bit_offset{ctx.BitOffset8(offset)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); + element = GetCbufElement(ctx, vector, offset, 0u); } + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); } Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { const Id load{ GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; return ctx.OpUConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; - const Id element{GetCbufElement(ctx, vector, offset, 0u)}; - const Id bit_offset{ctx.BitOffset16(offset)}; - return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); + element = GetCbufElement(ctx, vector, offset, 0u); } + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); } Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (ctx.profile.support_descriptor_aliasing) { + if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { const Id load{ GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; return ctx.OpSConvert(ctx.U32[1], load); + } + Id element{}; + if (ctx.profile.support_descriptor_aliasing) { + element = GetCbufU32(ctx, binding, offset); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; - const Id element{GetCbufElement(ctx, vector, offset, 0u)}; - const Id bit_offset{ctx.BitOffset16(offset)}; - return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); + element = GetCbufElement(ctx, vector, offset, 0u); } + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); } Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing) { - return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); + return GetCbufU32(ctx, binding, offset); } else { const Id vector{GetCbufU32x4(ctx, binding, offset)}; return GetCbufElement(ctx, vector, offset, 0u); From 0cd08b3e72ed042ae0bf97c62fb6b54580b0dfc9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 17 Jun 2021 20:35:25 -0300 Subject: [PATCH 692/770] shader: Align constant buffer sizes to 16 bytes WAR for AMD reading zeroes on uniform buffers of size 2. --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index bab32b58b..a82472152 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/alignment.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" @@ -602,7 +603,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } u32& size{info.constant_buffer_used_sizes[index.U32()]}; if (offset.IsImmediate()) { - size = std::max(size, offset.U32() + element_size); + size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); } else { size = 0x10'000; } From a0365217f5b2ec783738af396ebd82f12cffc0b4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 17 Jun 2021 21:44:12 -0400 Subject: [PATCH 693/770] texture_pass: Fix is_read image qualification Atomic operations are considered to have both read and write access. This was not being accounted for. --- src/shader_recompiler/ir_opt/texture_pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 737f186ab..44ad10d43 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -434,7 +434,7 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; - const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, From fcff19e0fa3d21130bc7b6cd50a10db102b5d4d7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 17 Jun 2021 23:12:41 -0400 Subject: [PATCH 694/770] shaders: Allow shader notify when async shaders is disabled --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 8 ++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b8b24dd3d..8aaadccc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -430,10 +430,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique( - device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, + maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dc028306a..e83628c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -529,11 +529,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, - texture_cache, notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -596,9 +595,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, notify, program.info, + thread_worker, &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { From 9e066dcb15b46a1b71068c4865f4a035aa0a43d4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 18 Jun 2021 00:54:16 -0400 Subject: [PATCH 695/770] glsl: Fix output varying initialization when transform feedback is used --- .../backend/glsl/emit_glsl_special.cpp | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 1a2d3dcea..2a15fc29a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -12,13 +12,47 @@ namespace Shader::Backend::GLSL { namespace { +std::string_view OutputVertexIndex(EmitContext& ctx) { + return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; +} + void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < 16; ++index) { - if (ctx.info.stores_generics[index]) { - ctx.Add("out_attr{}=vec4(0,0,0,1);", index); + for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { + if (!ctx.info.stores_generics[index]) { + continue; + } + const auto& info_array{ctx.output_generics.at(index)}; + const auto output_decorator{OutputVertexIndex(ctx)}; + size_t element{}; + while (element < info_array.size()) { + const auto& info{info_array.at(element)}; + const auto varying_name{fmt::format("{}{}", info.name, output_decorator)}; + switch (info.num_components) { + case 1: { + const char value{element == 3 ? '1' : '0'}; + ctx.Add("{}={}.f;", varying_name, value); + break; + } + case 2: + case 3: + if (element + info.num_components < 4) { + ctx.Add("{}=vec{}(0);", varying_name, info.num_components); + } else { + // last element is the w component, must be initialized to 1 + const auto zeros{info.num_components == 3 ? "0,0," : "0,"}; + ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros); + } + break; + case 4: + ctx.Add("{}=vec4(0,0,0,1);", varying_name); + break; + default: + break; + } + element += info.num_components; } } } From 838d7e4ca59b79dc9a8dd727a12dfba00e73242c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 18 Jun 2021 03:22:00 -0300 Subject: [PATCH 696/770] buffer_cache: Fix size reductions not having in mind bind sizes A buffer binding can change between shaders without changing the shaders. This lead to outdated bindings on OpenGL. --- src/video_core/buffer_cache/buffer_cache.h | 30 +++++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d004199ba..a75de4384 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -405,8 +405,6 @@ private: u32 written_compute_texture_buffers = 0; u32 image_compute_texture_buffers = 0; - std::array fast_bound_uniform_buffers{}; - std::array uniform_cache_hits{}; std::array uniform_cache_shots{}; @@ -416,6 +414,10 @@ private: std::conditional_t, Empty> dirty_uniform_buffers{}; + std::conditional_t, Empty> fast_bound_uniform_buffers{}; + std::conditional_t, NUM_STAGES>, Empty> + uniform_buffer_binding_sizes{}; std::vector cached_write_buffer_ids; @@ -684,6 +686,7 @@ void BufferCache

::SetUniformBuffersState(const std::array& m fast_bound_uniform_buffers.fill(0); } dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } } enabled_uniform_buffer_masks = mask; @@ -1016,14 +1019,18 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size); + !buffer.IsRegionGpuModified(cpu_addr, size) && false; if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia - if (!HasFastUniformBufferBound(stage, binding_index)) { + const bool should_fast_bind = + !HasFastUniformBufferBound(stage, binding_index) || + uniform_buffer_binding_sizes[stage][binding_index] != size; + if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1033,6 +1040,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } if constexpr (IS_OPENGL) { fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -1046,9 +1054,13 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } ++uniform_cache_shots[0]; - if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { - // Skip binding if it's not needed and if the bound buffer is not the fast version - // This exists to avoid instances where the fast buffer is bound and a GPU write happens + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + needs_bind |= HasFastUniformBufferBound(stage, binding_index); + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + } + if (!needs_bind) { return; } const u32 offset = buffer.Offset(cpu_addr); @@ -1060,6 +1072,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + uniform_buffer_binding_sizes[stage][binding_index] = size; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -1725,6 +1740,7 @@ template void BufferCache

::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d.dirty.flags; flags[Dirty::IndexBuffer] = true; From cfbc85839db5b92b0b205348f4ccdeb21570599c Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Fri, 18 Jun 2021 03:31:13 -0400 Subject: [PATCH 697/770] glsl: Add missing ; in EmitSetSampleMask Fixes shader compilation in Okami HD --- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 30c4cff81..3d2ba2eee 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -409,7 +409,7 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_vi } void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { - ctx.Add("gl_SampleMask[0]=int({})", value); + ctx.Add("gl_SampleMask[0]=int({});", value); } void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { From df9b7e18f5c5bab84cc8c38214a0e1e9e9506bd4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 17:28:46 -0300 Subject: [PATCH 698/770] buffer_cache: Fix debugging leftover --- src/video_core/buffer_cache/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a75de4384..cb9c69baf 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1019,7 +1019,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size) && false; + !buffer.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { From 1d182fc0f5f8a6facf6e4aebcf79d6d9a092a48c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 21:30:27 -0300 Subject: [PATCH 699/770] shader: Calibrate loop safety threshold --- .../frontend/maxwell/structured_control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 605ec38e1..0fb870a69 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -751,7 +751,7 @@ private: IR::Block* const init_block{block_pool.Create(inst_pool)}; IR::IREmitter ir{*init_block}; - static constexpr u32 SAFETY_THRESHOLD = 0x2000; + static constexpr u32 SAFETY_THRESHOLD = 0x1000; ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); if (current_block) { From 1c648f176c0f929266795181d1ba16d27008e5ff Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 19 Jun 2021 22:06:53 -0400 Subject: [PATCH 700/770] emit_glsl_special: Skip initialization of frag_color0 Fixes rendering in Devil May Cry without regressing Ori and the Blind Forest. --- src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 2a15fc29a..92aa1b168 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -94,7 +94,7 @@ void EmitPrologue(EmitContext& ctx) { InitializeOutputVaryings(ctx); if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { - for (size_t index = 0; index < ctx.info.stores_frag_color.size(); ++index) { + for (size_t index = 1; index < ctx.info.stores_frag_color.size(); ++index) { if (ctx.info.stores_frag_color[index]) { continue; } From 218dedca1f8572bc0e43f8e7ea577f4ece28c4c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:00:38 -0300 Subject: [PATCH 701/770] gl_graphics_pipeline: Port optimizations from Vulkan pipelines --- .../renderer_opengl/gl_graphics_pipeline.cpp | 180 ++++++++++++------ .../renderer_opengl/gl_graphics_pipeline.h | 18 +- 2 files changed, 141 insertions(+), 57 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 8d11fbc55..6b62fa1da 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,6 +15,12 @@ #include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; @@ -98,13 +104,76 @@ std::pair TransformFeedbackEnum(u8 location) { return {GL_POSITION, 0}; } -struct Spec { +template +bool Passes(const std::array& stage_infos, u32 enabled_mask) { + for (size_t stage = 0; stage < stage_infos.size(); ++stage) { + if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& stage_infos, u32 enabled_mask) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(stage_infos, enabled_mask)) { + return FindSpec(stage_infos, enabled_mask); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { static constexpr std::array enabled_stages{true, true, true, true, true}; static constexpr bool has_storage_buffers = true; static constexpr bool has_texture_buffers = true; static constexpr bool has_image_buffers = true; static constexpr bool has_images = true; }; + +ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 enabled_mask) { + return FindSpec(infos, enabled_mask); +} } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -129,8 +198,52 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + auto& info{stage_infos[stage]}; + if (infos[stage]) { + info = *infos[stage]; + enabled_stages_mask |= 1u << stage; + } + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); @@ -142,7 +255,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } else { AttachShader(Stage(stage), program.handle, code); } @@ -150,49 +262,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (!device.UseAssemblyShaders()) { LinkProgram(program.handle); } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += - AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += - AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } if (shader_notify) { shader_notify->MarkShaderComplete(); } @@ -205,7 +274,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -void GraphicsPipeline::Configure(bool is_indexed) { +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; @@ -221,7 +291,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -311,7 +381,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; @@ -430,6 +500,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { } } +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal @@ -475,9 +550,4 @@ void GraphicsPipeline::GenerateTransformFeedbackState( num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } -void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 58deafd3c..a3546daa8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -75,7 +75,9 @@ public: const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); - void Configure(bool is_indexed); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } void ConfigureTransformFeedback() const { if (num_xfb_attribs != 0) { @@ -91,11 +93,21 @@ public: return is_built.load(std::memory_order::relaxed); } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + template + void ConfigureImpl(bool is_indexed); void ConfigureTransformFeedbackImpl() const; + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -103,6 +115,8 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + void (*configure_func)(GraphicsPipeline*, bool){}; + OGLProgram program; std::array assembly_programs; u32 enabled_stages_mask{}; From f5db8c74405c93b52efbdef318790bd9ec4661c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:23:50 -0300 Subject: [PATCH 702/770] gl_shader_cache: Check previous pipeline before checking hash map Port optimization from Vulkan. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 33 +++++++++---------- .../renderer_opengl/gl_graphics_pipeline.h | 9 +++-- .../renderer_opengl/gl_shader_cache.cpp | 24 +++++++++----- .../renderer_opengl/gl_shader_cache.h | 5 ++- .../renderer_vulkan/vk_pipeline_cache.h | 3 +- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 6b62fa1da..92974ba08 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -184,17 +184,15 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, - BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - ShaderWorker* thread_worker, - VideoCore::ShaderNotify* shader_notify, - std::array sources, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { +GraphicsPipeline::GraphicsPipeline( + const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, std::array sources, + const std::array& infos, const GraphicsPipelineKey& key_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, key{key_} { if (shader_notify) { shader_notify->MarkShaderBuilding(); } @@ -241,10 +239,10 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && key.xfb_enabled) { + GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); } @@ -505,15 +503,14 @@ void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); } -void GraphicsPipeline::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { +void GraphicsPipeline::GenerateTransformFeedbackState() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = xfb_state.layouts[feedback]; + const auto& layout = key.xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -528,7 +525,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = xfb_state.varyings[feedback]; + const auto& locations = key.xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a3546daa8..a033d4a95 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -73,7 +73,7 @@ public: ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); + const GraphicsPipelineKey& key_); void Configure(bool is_indexed) { configure_func(this, is_indexed); @@ -85,6 +85,10 @@ public: } } + [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept { + return key; + } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { return writes_global_memory; } @@ -106,7 +110,7 @@ private: void ConfigureTransformFeedbackImpl() const; - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + void GenerateTransformFeedbackState(); TextureCache& texture_cache; BufferCache& buffer_cache; @@ -114,6 +118,7 @@ private: Tegra::Engines::Maxwell3D& maxwell3d; ProgramManager& program_manager; StateTracker& state_tracker; + const GraphicsPipelineKey key; void (*configure_func)(GraphicsPipeline*, bool){}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8aaadccc4..c36b0d8cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -298,6 +298,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; return nullptr; } const auto& regs{maxwell3d.regs}; @@ -313,15 +314,23 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (graphics_key.xfb_enabled) { SetXfbState(graphics_key.xfb_state, regs); } - const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& program{pair->second}; - if (is_new) { - program = CreateGraphicsPipeline(); + if (current_pipeline && graphics_key == current_pipeline->Key()) { + return current_pipeline->IsBuilt() ? current_pipeline : nullptr; } - if (!program || !program->IsBuilt()) { + return CurrentGraphicsPipelineSlowPath(); +} + +GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); + } + current_pipeline = pipeline.get(); + if (!pipeline || !pipeline->IsBuilt()) { return nullptr; } - return program.get(); + return pipeline.get(); } ComputePipeline* ShaderCache::CurrentComputePipeline() { @@ -432,8 +441,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, &shader_notify, sources, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ff5707119..16873fcec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -53,6 +53,8 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -75,9 +77,10 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; VideoCore::ShaderNotify& shader_notify; + const bool use_asynchronous_shaders; GraphicsPipelineKey graphics_key{}; - const bool use_asynchronous_shaders; + GraphicsPipeline* current_pipeline{}; ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 42da2960b..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -146,12 +146,11 @@ private: BufferCache& buffer_cache; TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; + bool use_asynchronous_shaders{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - bool use_asynchronous_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; From 9bd05313849f76fc64406d5ebf3aadf39fa3bfde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:35:30 -0300 Subject: [PATCH 703/770] gl_graphics_pipeline: Inline hash and operator== key functions --- .../renderer_opengl/gl_graphics_pipeline.cpp | 10 ---------- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 92974ba08..ad61a17a5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include -#include -#include "common/cityhash.h" #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -176,14 +174,6 @@ ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 ena } } // Anonymous namespace -size_t GraphicsPipelineKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - GraphicsPipeline::GraphicsPipeline( const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a033d4a95..f82d712f8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -5,10 +5,12 @@ #pragma once #include +#include #include #include #include "common/bit_field.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" @@ -44,9 +46,13 @@ struct GraphicsPipelineKey { std::array padding; VideoCommon::TransformFeedbackState xfb_state; - size_t Hash() const noexcept; + size_t Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); + } - bool operator==(const GraphicsPipelineKey&) const noexcept; + bool operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; + } bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { return !operator==(rhs); From 3a2dd1b48310e2912e7f7f90da15bff555ef7256 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:40:21 -0400 Subject: [PATCH 704/770] glasm: Implement SetAttribute ViewportMask --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 ++- .../backend/glasm/emit_glasm_context_get_set.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 66e4aea04..f39b02f77 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -286,7 +286,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if ((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) { + if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || + info.stores_viewport_mask) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 20b925877..bc195d248 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -157,6 +157,14 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, "Viewport stored outside of geometry shader not supported by device"); } break; + case IR::Attribute::ViewportMask: + // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage. + if (ctx.profile.support_viewport_index_layer_non_geometry) { + ctx.Add("MOV.F result.viewportmask[0].x,{};", value); + } else { + LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask"); + } + break; case IR::Attribute::PointSize: ctx.Add("MOV.F result.pointsize.x,{};", value); break; From cbce9ddd4a6d948c6d08cbe5f4bb7e228eb4c85e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:47:56 -0400 Subject: [PATCH 705/770] glsl: Remove frag color initialization --- src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 92aa1b168..6420aaa21 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -92,15 +92,6 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& void EmitPrologue(EmitContext& ctx) { InitializeOutputVaryings(ctx); - - if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { - for (size_t index = 1; index < ctx.info.stores_frag_color.size(); ++index) { - if (ctx.info.stores_frag_color[index]) { - continue; - } - ctx.Add("frag_color{}=vec4(0,0,0,1);", index); - } - } } void EmitEpilogue(EmitContext&) {} From 3877918e9657bcde160080aecc1821cf8cb50ea4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 17:09:50 -0300 Subject: [PATCH 706/770] gl_graphics_pipeline: Fix assembly shaders check for transform feedbacks --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index ad61a17a5..a93b03cf7 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -229,7 +229,7 @@ GraphicsPipeline::GraphicsPipeline( writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && key.xfb_enabled) { + if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { From 808ef97a086e7cc58a3ceded1de516ad6a6be5d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 21 Jun 2021 01:07:10 -0300 Subject: [PATCH 707/770] shader: Move loop safety tests to code emission --- .../backend/glasm/emit_context.h | 1 + .../backend/glasm/emit_glasm.cpp | 17 +++++++++ .../backend/glasm/emit_glasm_instructions.h | 2 - .../glasm/emit_glasm_not_implemented.cpp | 8 ---- .../backend/glsl/emit_context.h | 2 + .../backend/glsl/emit_glsl.cpp | 11 +++++- .../backend/glsl/emit_glsl_instructions.h | 2 - .../glsl/emit_glsl_not_implemented.cpp | 8 ---- .../backend/spirv/emit_spirv.cpp | 19 +++++++++- .../spirv/emit_spirv_context_get_set.cpp | 8 ---- .../backend/spirv/emit_spirv_instructions.h | 2 - .../frontend/ir/ir_emitter.cpp | 8 ---- .../frontend/ir/ir_emitter.h | 3 -- src/shader_recompiler/frontend/ir/opcodes.inc | 2 - .../maxwell/structured_control_flow.cpp | 37 ++----------------- .../ir_opt/ssa_rewrite_pass.cpp | 32 +--------------- 16 files changed, 54 insertions(+), 108 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index cd4213cb7..9f86e55d3 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -71,6 +71,7 @@ public: std::string_view stage_name = "invalid"; std::string_view attrib_name = "invalid"; + u32 num_safety_loop_vars{}; bool uses_y_direction{}; }; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index f39b02f77..79314f130 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -6,6 +6,8 @@ #include #include +#include "common/div_ceil.h" +#include "common/settings.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -222,6 +224,14 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("REP;"); break; case IR::AbstractSyntaxNode::Type::Repeat: + if (!Settings::values.disable_shader_loop_safety_checks) { + const u32 loop_index{ctx.num_safety_loop_vars++}; + const u32 vector_index{loop_index / 4}; + const char component{"xyzw"[loop_index % 4]}; + ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" + "BRK(LT.{});", + vector_index, component, vector_index, component, component); + } if (node.data.repeat.cond.IsImmediate()) { if (node.data.repeat.cond.U1()) { ctx.Add("ENDREP;"); @@ -425,6 +435,10 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I if (program.info.uses_fswzadd) { header += "FSWZA[4],FSWZB[4],"; } + const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; + for (u32 index = 0; index < num_safety_loop_vectors; ++index) { + header += fmt::format("loop{},", index); + } header += "RC;" "LONG TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { @@ -441,6 +455,9 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I "MOV.F FSWZB[2],1;" "MOV.F FSWZB[3],-1;"; } + for (u32 index = 0; index < num_safety_loop_vectors; ++index) { + header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); + } if (ctx.uses_y_direction) { header += "PARAM y_direction[1]={state.material.front.ambient};"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index fef9ff9be..c9f4826ce 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -42,8 +42,6 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitSetLoopSafetyVariable(EmitContext& ctx); -void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index a487a0744..ff64c6924 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -153,14 +153,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitSetLoopSafetyVariable(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGetLoopSafetyVariable(EmitContext& ctx) { - NotImplemented(); -} - void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8fa87c02c..ecdf6e5bc 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -153,6 +153,8 @@ public: std::vector images; std::array, 32> output_generics{}; + u32 num_safety_loop_vars{}; + bool uses_y_direction{}; bool uses_cc_carry{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index ff869923f..32c4f1da2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -6,6 +6,7 @@ #include #include "common/alignment.h" +#include "common/settings.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" @@ -156,7 +157,12 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); + if (Settings::values.disable_shader_loop_safety_checks) { + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); + } else { + ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++, + ctx.var_alloc.Consume(node.data.repeat.cond)); + } break; default: throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); @@ -198,6 +204,9 @@ void DefineVariables(const EmitContext& ctx, std::string& header) { ctx.var_alloc.Representation(index, type), type_name); } } + for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) { + header += fmt::format("int loop{}=0x2000;", i); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index df28036e4..6a30785bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -44,8 +44,6 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitSetLoopSafetyVariable(EmitContext& ctx); -void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 0a28a1ffc..f420fe388 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -46,14 +46,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitSetLoopSafetyVariable(EmitContext& ctx) { - NotImplemented(); -} - -void EmitGetLoopSafetyVariable(EmitContext& ctx) { - NotImplemented(); -} - void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index fd59b4d0a..278c262f8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -8,6 +8,7 @@ #include #include +#include "common/settings.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -151,9 +152,25 @@ void Traverse(EmitContext& ctx, IR::Program& program) { } break; case IR::AbstractSyntaxNode::Type::Repeat: { + Id cond{ctx.Def(node.data.repeat.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])}; + const Id safety_counter{ctx.AddGlobalVariable( + pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))}; + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(safety_counter); + } + const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)}; + const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))}; + ctx.OpStore(safety_counter, new_counter); + + const Id safety_cond{ + ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)}; + cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond); + } const Id loop_header_label{node.data.repeat.loop_header->Definition()}; const Id merge_label{node.data.repeat.merge->Definition()}; - ctx.OpBranchConditional(ctx.Def(node.data.repeat.cond), loop_header_label, merge_label); + ctx.OpBranchConditional(cond, loop_header_label, merge_label); break; } case IR::AbstractSyntaxNode::Type::Return: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2e364baec..85bd72389 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -198,14 +198,6 @@ void EmitGetIndirectBranchVariable(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSetLoopSafetyVariable(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - -void EmitGetLoopSafetyVariable(EmitContext&) { - throw LogicError("Unreachable instruction"); -} - Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e3e5b03fe..1181e7b4f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -43,8 +43,6 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitSetLoopSafetyVariable(EmitContext& ctx); -void EmitGetLoopSafetyVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d2ac2acac..2e75208e6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -147,14 +147,6 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -U32 IREmitter::GetLoopSafetyVariable(u32 id) { - return Inst(Opcode::GetLoopSafetyVariable, id); -} - -void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { - Inst(Opcode::SetLoopSafetyVariable, id, counter); -} - U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { return Inst(Opcode::GetCbufU32, binding, byte_offset); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7caab1f61..bb3500c54 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,9 +55,6 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); - [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); - void SetLoopSafetyVariable(u32 id, const U32& counter); - [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e87aeddd5..8a8d0d759 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,8 +32,6 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) -OPCODE(GetLoopSafetyVariable, U32, U32, ) -OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 0fb870a69..10d05dc4c 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -15,7 +15,6 @@ #include -#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -663,7 +662,7 @@ public: Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - IR::IREmitter ir = IR::IREmitter(first_block, first_block.begin()); + IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); } @@ -741,27 +740,8 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - const u32 this_loop_id{loop_id++}; - - if (Settings::values.disable_shader_loop_safety_checks) { - if (current_block) { - current_block->AddBranch(loop_header_block); - } - } else { - IR::Block* const init_block{block_pool.Create(inst_pool)}; - IR::IREmitter ir{*init_block}; - - static constexpr u32 SAFETY_THRESHOLD = 0x1000; - ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); - - if (current_block) { - current_block->AddBranch(init_block); - } - init_block->AddBranch(loop_header_block); - - auto& init_node{syntax_list.emplace_back()}; - init_node.type = IR::AbstractSyntaxNode::Type::Block; - init_node.data.block = init_block; + if (current_block) { + current_block->AddBranch(loop_header_block); } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -779,16 +759,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - if (!Settings::values.disable_shader_loop_safety_checks) { - const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; - const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; - ir.SetLoopSafetyVariable(this_loop_id, new_counter); - - const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; - cond = ir.LogicalAnd(cond, safety_cond); - } - cond = ir.ConditionRef(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index fff25c4a2..dcaced83f 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,22 +48,12 @@ struct GotoVariable : FlagTag { u32 index; }; -struct LoopSafetyVariable { - LoopSafetyVariable() = default; - explicit LoopSafetyVariable(u32 index_) : index{index_} {} - - auto operator<=>(const LoopSafetyVariable&) const noexcept = default; - - u32 index; -}; - struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = - std::variant; +using Variant = std::variant; using ValueMap = boost::container::flat_map; struct DefTable { @@ -88,13 +78,6 @@ struct DefTable { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { - return loop_safety_vars[variable.index][block]; - } - void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { - loop_safety_vars[variable.index].insert_or_assign(block, value); - } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } @@ -132,7 +115,6 @@ struct DefTable { std::array preds; boost::container::flat_map goto_vars; - boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -152,10 +134,6 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } -IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { - return IR::Opcode::UndefU32; -} - IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -337,9 +315,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; - case IR::Opcode::SetLoopSafetyVariable: - pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); - break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -368,9 +343,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; - case IR::Opcode::GetLoopSafetyVariable: - inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); - break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; From a7536825dfd3a424ff709995653da4da0ce6dea6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 21 Jun 2021 21:07:52 -0400 Subject: [PATCH 708/770] shader_recompiler: Fix IADD3 input partitioning --- .../impl/integer_add_three_input.cpp | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 259a6e6ac..33e2a51ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -42,14 +42,10 @@ enum class Half : u64 { } } -void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_a; - BitField<31, 2, Half> half_c; - BitField<33, 2, Half> half_b; - BitField<35, 2, Half> half_a; BitField<37, 2, Shift> shift; BitField<47, 1, u64> cc; BitField<48, 1, u64> x; @@ -58,11 +54,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { BitField<51, 1, u64> neg_a; } iadd3{insn}; - IR::U32 op_a{v.X(iadd3.src_a)}; - op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); - op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); - op_c = IntegerHalf(v.ir, op_c, iadd3.half_c); - if (iadd3.neg_a != 0) { op_a = v.ir.INeg(op_a); } @@ -72,7 +63,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { if (iadd3.neg_c != 0) { op_c = v.ir.INeg(op_c); } - IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; @@ -97,15 +87,24 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { } // Anonymous namespace void TranslatorVisitor::IADD3_reg(u64 insn) { - IADD3(*this, insn, GetReg20(insn), GetReg39(insn)); + union { + u64 insn; + BitField<35, 2, Half> half_a; + BitField<31, 2, Half> half_c; + BitField<33, 2, Half> half_b; + } iadd3{insn}; + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; + const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; + const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; + IADD3(*this, insn, op_a, op_b, op_c); } void TranslatorVisitor::IADD3_cbuf(u64 insn) { - IADD3(*this, insn, GetCbuf(insn), GetReg39(insn)); + IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); } void TranslatorVisitor::IADD3_imm(u64 insn) { - IADD3(*this, insn, GetImm20(insn), GetReg39(insn)); + IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); } } // namespace Shader::Maxwell From bc6e399ae3a9279b619f75a4b6fa4588934e9e78 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 21 Jun 2021 21:22:12 -0400 Subject: [PATCH 709/770] glsl: Fix IADD CC --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_integer.cpp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da2..5867a04ab 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -227,7 +227,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header += "void main(){\n"; DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { - ctx.header += "uint carry;"; + ctx.header += "uint carry;uint iadd_op_b;"; } if (program.info.uses_subgroup_shuffles) { ctx.header += "bool shfl_in_bounds;"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 983e6d95d..fcc9afd85 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -33,8 +33,8 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { ctx.uses_cc_carry = true; - ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); - ctx.AddU1("{}=carry!=0;", *carry, result); + ctx.Add("iadd_op_b={};{}=uaddCarry({},{},carry);", b, result, a, b); + ctx.AddU1("{}=carry!=0;", *carry); carry->Invalidate(); } else { ctx.Add("{}={}+{};", result, a, b); @@ -44,8 +44,10 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; - ctx.AddU1("{}=int({})>=0?int({})>int({}-{}):int({})=0?int({})>int({}):int({})Invalidate(); } } From 4397053d5c848deae00d6599f91b1e5c137a9639 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 22 Jun 2021 18:28:21 -0300 Subject: [PATCH 710/770] shader: Remove IAbs64 --- .../backend/glasm/emit_glasm_instructions.h | 1 - .../backend/glasm/emit_glasm_integer.cpp | 4 ---- .../backend/glsl/emit_glsl_instructions.h | 1 - .../backend/glsl/emit_glsl_integer.cpp | 4 ---- .../backend/spirv/emit_spirv_instructions.h | 1 - .../backend/spirv/emit_spirv_integer.cpp | 4 ---- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 11 ++--------- src/shader_recompiler/frontend/ir/ir_emitter.h | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 1 - 9 files changed, 3 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index c9f4826ce..12afda43b 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -306,7 +306,6 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); -void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, Register value); void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index cea45a3e0..587bbfe06 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -82,10 +82,6 @@ void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { ctx.Add("ABS.S {},{};", inst, value); } -void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, Register value) { - ctx.LongAdd("MOV.S64 {},|{}|;", inst, value); -} - void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 6a30785bb..5936d086f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -365,7 +365,6 @@ void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view shift); void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index fcc9afd85..40f453593 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -80,10 +80,6 @@ void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { ctx.AddU32("{}=abs(int({}));", inst, value); } -void EmitIAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=abs(int64_t({}));", inst, value); -} - void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, std::string_view shift) { ctx.AddU32("{}={}<<{};", inst, base, shift); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1181e7b4f..f99c02848 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -284,7 +284,6 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitIAbs64(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 06ab23b1d..3501d7495 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -84,10 +84,6 @@ Id EmitIAbs32(EmitContext& ctx, Id value) { return ctx.OpSAbs(ctx.U32[1], value); } -Id EmitIAbs64(EmitContext& ctx, Id value) { - return ctx.OpSAbs(ctx.U64, value); -} - Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 2e75208e6..13159a68d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1152,15 +1152,8 @@ U32U64 IREmitter::INeg(const U32U64& value) { } } -U32U64 IREmitter::IAbs(const U32U64& value) { - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::IAbs32, value); - case Type::U64: - return Inst(Opcode::IAbs64, value); - default: - ThrowInvalidType(value.Type()); - } +U32 IREmitter::IAbs(const U32& value) { + return Inst(Opcode::IAbs32, value); } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..53f7b3b06 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -208,7 +208,7 @@ public: [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); - [[nodiscard]] U32U64 IAbs(const U32U64& value); + [[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..9af750283 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -289,7 +289,6 @@ OPCODE(IMul32, U32, U32, OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) -OPCODE(IAbs64, U64, U64, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) From 27ca8a0e13deeebb4185ec22619d2b78b5ad8b21 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 22 Jun 2021 23:09:22 -0400 Subject: [PATCH 711/770] glsl: Better IAdd Overflow CC fix This ensures the original operand values are not overwritten when being used in the overflow detection. --- .../backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_integer.cpp | 22 ++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 5867a04ab..32c4f1da2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -227,7 +227,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header += "void main(){\n"; DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { - ctx.header += "uint carry;uint iadd_op_b;"; + ctx.header += "uint carry;"; } if (program.info.uses_subgroup_shuffles) { ctx.header += "bool shfl_in_bounds;"; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 40f453593..2892074e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -30,10 +30,21 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { } // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { + // Compute the overflow CC first as it requires the original operand values, + // which may be overwritten by the result of the addition + if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; + const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; + const auto negative_result{fmt::format("int({})=0?{}:{};", *overflow, a, positive_result, negative_result); + overflow->Invalidate(); + } const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { ctx.uses_cc_carry = true; - ctx.Add("iadd_op_b={};{}=uaddCarry({},{},carry);", b, result, a, b); + ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); ctx.AddU1("{}=carry!=0;", *carry); carry->Invalidate(); } else { @@ -41,15 +52,6 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin } SetZeroFlag(ctx, inst, result); SetSignFlag(ctx, inst, result); - if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { - // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c - constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; - const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; - const auto op_b{ctx.uses_cc_carry ? "iadd_op_b" : b}; - ctx.AddU1("{}=int({})>=0?int({})>int({}):int({})Invalidate(); - } } void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { From 97e80dda55aec0ff791e4990f09c85e7a7067730 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:32:09 -0300 Subject: [PATCH 712/770] shader: Add constant propagation to integer vectors --- .../ir_opt/constant_propagation_pass.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 796b4122d..3c72203ad 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -563,6 +563,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; + case IR::Opcode::CompositeExtractU32x2: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2, + IR::Opcode::CompositeInsertU32x2); + case IR::Opcode::CompositeExtractU32x3: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3, + IR::Opcode::CompositeInsertU32x3); + case IR::Opcode::CompositeExtractU32x4: + return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4, + IR::Opcode::CompositeInsertU32x4); case IR::Opcode::CompositeExtractF32x2: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, IR::Opcode::CompositeInsertF32x2); From 04ef2160f9e164dbf7c2ab2f37de5533a8d5c450 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:32:47 -0300 Subject: [PATCH 713/770] shader: Teach global memory base tracker to follow vectors --- .../global_memory_to_storage_buffer_pass.cpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 1d11a00d8..70449eeca 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -253,12 +253,12 @@ struct LowAddrInfo { /// Tries to track the first 32-bits of a global memory instruction std::optional TrackLowAddress(IR::Inst* inst) { // The first argument is the low level GPU pointer to the global memory instruction - const IR::U64 addr{inst->Arg(0)}; + const IR::Value addr{inst->Arg(0)}; if (addr.IsImmediate()) { // Not much we can do if it's an immediate return std::nullopt; } - // This address is expected to either be a PackUint2x32 or a IAdd64 + // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { @@ -274,25 +274,24 @@ std::optional TrackLowAddress(IR::Inst* inst) { if (iadd_addr.IsImmediate()) { return std::nullopt; } - addr_inst = iadd_addr.Inst(); + addr_inst = iadd_addr.InstRecursive(); } - // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { - return std::nullopt; + // With IAdd64 handled, now PackUint2x32 is expected + if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { + // PackUint2x32 is expected to be generated from a vector + const IR::Value vector{addr_inst->Arg(0)}; + if (vector.IsImmediate()) { + return std::nullopt; + } + addr_inst = vector.InstRecursive(); } - // PackUint2x32 is expected to be generated from a vector - const IR::Value vector{addr_inst->Arg(0)}; - if (vector.IsImmediate()) { - return std::nullopt; - } - // This vector is expected to be a CompositeConstructU32x2 - IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { + // The vector is expected to be a CompositeConstructU32x2 + if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ - .value{IR::U32{vector_inst->Arg(0)}}, + .value{IR::U32{addr_inst->Arg(0)}}, .imm_offset = imm_offset, }; } From d8d5501459d6c8b4c39307d293b0f40834dce8f3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:33:42 -0300 Subject: [PATCH 714/770] shader: Add int64 to int32 lowering pass --- src/shader_recompiler/CMakeLists.txt | 1 + .../ir_opt/lower_int64_to_int32.cpp | 216 ++++++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + 3 files changed, 218 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f6719ad9d..3b5708cb9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -219,6 +219,7 @@ add_library(shader_recompiler STATIC ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/lower_fp16_to_fp32.cpp + ir_opt/lower_int64_to_int32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/texture_pass.cpp diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp new file mode 100644 index 000000000..787a64f93 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -0,0 +1,216 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Optimization { +namespace { +std::pair Unpack(IR::IREmitter& ir, const IR::Value& packed) { + if (packed.IsImmediate()) { + const u64 value{packed.U64()}; + return { + ir.Imm32(static_cast(value)), + ir.Imm32(static_cast(value >> 32)), + }; + } else { + return std::pair{ + ir.CompositeExtract(packed, 0u), + ir.CompositeExtract(packed, 1u), + }; + } +} + +void IAdd64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("IAdd64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; + const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + + const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)}; + const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))}; + + const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ISub64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ISub64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; + const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; + + const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)}; + const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)}; + const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))}; + + const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void INeg64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("INeg64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + lo = ir.BitwiseNot(lo); + hi = ir.BitwiseNot(hi); + + lo = ir.IAdd(lo, ir.Imm32(1)); + + const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))}; + hi = ir.IAdd(hi, carry); + + inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi)); +} + +void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_lo{ir.Imm32(0)}; + const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)}; + const IR::U32 short_ret_lo{shifted_lo}; + const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_hi{ir.Imm32(0)}; + const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)}; + const IR::U32 short_ret_hi{shifted_hi}; + const IR::U32 short_ret_lo{ + ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { + if (inst.HasAssociatedPseudoOperation()) { + throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions"); + } + IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); + const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; + const IR::U32 shift{inst.Arg(1)}; + + const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; + const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)}; + + const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))}; + + const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; + const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; + const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; + + const IR::U32 long_ret_hi{sign_extension}; + const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)}; + + const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; + const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift)); + const IR::U32 short_ret_hi{shifted_hi}; + const IR::U32 short_ret_lo{ + ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; + + const IR::U32 zero_ret_lo{lo}; + const IR::U32 zero_ret_hi{hi}; + + const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; + const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; + + const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; + const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; + inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); +} + +void Lower(IR::Block& block, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::PackUint2x32: + case IR::Opcode::UnpackUint2x32: + return inst.ReplaceOpcode(IR::Opcode::Identity); + case IR::Opcode::IAdd64: + return IAdd64To32(block, inst); + case IR::Opcode::ISub64: + return ISub64To32(block, inst); + case IR::Opcode::INeg64: + return INeg64To32(block, inst); + case IR::Opcode::ShiftLeftLogical64: + return ShiftLeftLogical64To32(block, inst); + case IR::Opcode::ShiftRightLogical64: + return ShiftRightLogical64To32(block, inst); + case IR::Opcode::ShiftRightArithmetic64: + return ShiftRightArithmetic64To32(block, inst); + default: + break; + } +} +} // Anonymous namespace + +void LowerInt64ToInt32(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Inst& inst : block->Instructions()) { + Lower(*block, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 5ebde49ea..2f89b1ea0 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -18,6 +18,7 @@ void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); +void LowerInt64ToInt32(IR::Program& program); void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: [PATCH 715/770] shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 6 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc..c084f3400 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Thu, 24 Jun 2021 00:19:31 -0400 Subject: [PATCH 716/770] lower_int64_to_int32: Add missing include --- src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index 787a64f93..abf7c87c7 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { namespace { From 395bed3a0af90a53be44e81eadd06f4931c8e933 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 02:41:00 -0300 Subject: [PATCH 717/770] shader: Unify shader stage types --- src/shader_recompiler/stage.h | 11 ++++++++-- src/video_core/engines/kepler_compute.cpp | 1 - src/video_core/engines/maxwell_3d.cpp | 1 - src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 21 ------------------- src/video_core/renderer_opengl/gl_device.cpp | 18 +++++++++------- src/video_core/renderer_opengl/gl_device.h | 11 +++++----- .../renderer_opengl/gl_rasterizer.cpp | 2 -- .../renderer_opengl/gl_shader_cache.cpp | 1 - .../renderer_opengl/gl_shader_cache.h | 1 - .../renderer_vulkan/maxwell_to_vk.cpp | 15 ++++++------- .../renderer_vulkan/maxwell_to_vk.h | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 -- src/video_core/shader_environment.cpp | 2 +- 15 files changed, 37 insertions(+), 55 deletions(-) delete mode 100644 src/video_core/engines/shader_type.h diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index 7d4f2c0bb..5c1c8d8fc 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -9,13 +9,20 @@ namespace Shader { enum class Stage : u32 { - Compute, - VertexA, VertexB, TessellationControl, TessellationEval, Geometry, Fragment, + + Compute, + + VertexA, }; +constexpr u32 MaxStageTypes = 6; + +[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { + return static_cast(static_cast(Stage::VertexB) + index); +} } // namespace Shader diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index cae93c470..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 103a51fd0..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d5790f6..fc2c36c6b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -20,7 +20,6 @@ #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro/macro.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Tegra::Engines { - -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; -static constexpr std::size_t MaxShaderTypes = 6; - -} // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b1b5ba1ab..27be347e6 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -59,16 +60,18 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -std::array BuildMaxUniformBuffers() noexcept { - std::array max; - std::ranges::transform(LIMIT_UBOS, max.begin(), - [](GLenum pname) { return GetInteger(pname); }); +std::array BuildMaxUniformBuffers() noexcept { + std::array max; + std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger); return max; } bool IsASTCSupported() { - static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; - static constexpr std::array formats = { + static constexpr std::array targets{ + GL_TEXTURE_2D, + GL_TEXTURE_2D_ARRAY, + }; + static constexpr std::array formats{ GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, @@ -84,11 +87,10 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - static constexpr std::array required_support = { + static constexpr std::array required_support{ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, }; - for (const GLenum target : targets) { for (const GLenum format : formats) { for (const GLenum support : required_support) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0bd277d38..ad7b01b06 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,7 +6,7 @@ #include #include "common/common_types.h" -#include "video_core/engines/shader_type.h" +#include "shader_recompiler/stage.h" namespace OpenGL { @@ -16,8 +16,8 @@ public: [[nodiscard]] std::string GetVendorName() const; - u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { - return max_uniform_buffers[static_cast(shader_type)]; + u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { + return max_uniform_buffers[static_cast(stage)]; } size_t GetUniformBufferAlignment() const { @@ -148,8 +148,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::string vendor_name; - std::array max_uniform_buffers{}; + std::array max_uniform_buffers{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; @@ -181,6 +180,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + + std::string vendor_name; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e3d336f86..0f0d780b5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -23,7 +23,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" @@ -40,7 +39,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using GLvec4 = std::array; -using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f2f18b18a..5af9b7745 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -26,7 +26,6 @@ #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16873fcec..9d5306293 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -17,7 +17,6 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_context.h" diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f0b0b8ec..8f9b9a11a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 50a599c11..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2b59a9d88..9eb353a88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -737,7 +737,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), .module = *spv_modules[stage], .pName = "main", .pSpecializationInfo = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c57e16c50..f04c3394c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,8 +58,6 @@ struct DrawParams { bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 6243cd176..d463e2b56 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 3; +constexpr u32 CACHE_VERSION = 4; constexpr size_t INST_SIZE = sizeof(u64); From ecd6b4356b3a12c0963c4fd6ec6d273a7064ac2f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 03:31:49 -0300 Subject: [PATCH 718/770] shader: Only verify shader when graphics debugging is enabled --- .../frontend/maxwell/translate_program.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c084f3400..a8b727f1a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -7,6 +7,7 @@ #include #include +#include "common/settings.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" @@ -164,7 +165,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Wed, 23 Jun 2021 03:32:41 -0300 Subject: [PATCH 719/770] vk_graphics_pipeline: Implement conservative rendering --- src/video_core/engines/maxwell_3d.h | 7 +++- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++++++++++++----- .../vulkan_common/vulkan_device.cpp | 6 ++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index fc2c36c6b..da2ded671 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -910,7 +910,11 @@ public: u32 fill_rectangle; - INSERT_PADDING_WORDS_NOINIT(0x8); + INSERT_PADDING_WORDS_NOINIT(0x2); + + u32 conservative_raster_enable; + + INSERT_PADDING_WORDS_NOINIT(0x5); std::array vertex_attrib_format; @@ -1615,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); +ASSERT_REG_POSITION(conservative_raster_enable, 0x452); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(multisample_sample_locations, 0x478); ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 16cef8711..7563dc462 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -87,6 +87,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, depth_format.Assign(static_cast(regs.zeta.format)); y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04f34eb97..66b57b636 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -194,6 +194,7 @@ struct FixedPipelineState { BitField<6, 5, u32> depth_format; BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9eb353a88..70e183e65 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -599,16 +599,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pScissors = nullptr, }; - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 - ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT - : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, - }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, + .pNext = nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -625,6 +618,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9d918de8d..7b184d2f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + if (!ext_conservative_rasterization) { + LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); + } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; if (ext_provoking_vertex) { provoking_vertex = { @@ -776,6 +780,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, + true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 40d00a52f..a9c0a0e4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -264,6 +264,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_conservative_rasterization. + bool IsExtConservativeRasterizationSupported() const { + return ext_conservative_rasterization; + } + /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { return ext_provoking_vertex; @@ -374,6 +379,7 @@ private: bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: [PATCH 720/770] shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 15 +- .../backend/glasm/emit_glasm.cpp | 6 +- .../glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glsl/emit_context.cpp | 58 +++-- .../backend/glsl/emit_glsl.cpp | 2 +- .../glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 4 +- .../backend/spirv/emit_context.cpp | 103 +++++---- .../backend/spirv/emit_context.h | 2 +- .../backend/spirv/emit_spirv.cpp | 19 +- .../spirv/emit_spirv_context_get_set.cpp | 2 +- src/shader_recompiler/environment.h | 5 + src/shader_recompiler/frontend/ir/attribute.h | 6 + src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 208 ++++++------------ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/program_header.h | 62 ++---- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/shader_info.h | 37 +--- src/shader_recompiler/varying_state.h | 69 ++++++ src/video_core/engines/maxwell_3d.h | 7 +- .../renderer_opengl/gl_shader_cache.cpp | 7 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +- src/video_core/shader_environment.cpp | 10 +- .../vulkan_common/vulkan_device.cpp | 6 + src/video_core/vulkan_common/vulkan_device.h | 6 + 29 files changed, 351 insertions(+), 337 deletions(-) create mode 100644 src/shader_recompiler/varying_state.h diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3b5708cb9..b5b7e5e83 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -229,6 +229,7 @@ add_library(shader_recompiler STATIC program_header.h runtime_info.h shader_info.h + varying_state.h ) target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 21e14867c..80dad9ff3 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,14 +83,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (generic.used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, attr_stage, index, index); + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads_position) { + if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -102,7 +101,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { @@ -124,8 +123,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 79314f130..2b96977b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -296,8 +296,10 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || - info.stores_viewport_mask) { + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index bc195d248..02c9dc6d7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -261,7 +261,7 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, fmt::format("{}.z", value), fmt::format("{}.w", value)}; read(compare_index, values); }}; - if (ctx.info.loads_position) { + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { const u32 index{static_cast(IR::Attribute::PositionX)}; if (IsInputArray(ctx.stage)) { read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); @@ -269,8 +269,8 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { - if (!ctx.info.input_generics[index].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { continue; } read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 14c009535..0d7f7bc3b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -212,22 +212,22 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { - if (!ctx.info.stores_legacy_varyings) { + if (!ctx.info.stores.Legacy()) { return; } - if (ctx.info.stores_fixed_fnc_textures) { + if (ctx.info.stores.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.stores_color_front_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_FrontColor;"; } - if (ctx.info.stores_color_front_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { header += "vec4 gl_FrontSecondaryColor;"; } - if (ctx.info.stores_color_back_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { header += "vec4 gl_BackColor;"; } - if (ctx.info.stores_color_back_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { header += "vec4 gl_BackSecondaryColor;"; } } @@ -237,32 +237,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { return; } header += "out gl_PerVertex{vec4 gl_Position;"; - if (ctx.info.stores_point_size) { + if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; } - if (ctx.info.stores_clip_distance) { + if (ctx.info.stores.ClipDistances()) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && - ctx.stage != Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } SetupLegacyOutPerVertex(ctx, header); header += "};"; - if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { - if (!ctx.info.loads_legacy_varyings) { + if (!ctx.info.loads.Legacy()) { return; } header += "in gl_PerFragment{"; - if (ctx.info.loads_fixed_fnc_textures) { + if (ctx.info.loads.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.loads_color_front_diffuse) { + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_Color;"; } header += "};"; @@ -325,14 +325,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupOutPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += - fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,11 +348,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (!info.stores_generics[index]) { - continue; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); } - DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -398,14 +396,14 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if ((info.stores_viewport_index || info.stores_layer) && + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } if (info.uses_typeless_image_reads) { @@ -535,20 +533,20 @@ void EmitContext::DefineHelperFunctions() { fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { const auto position_idx{is_array ? "gl_in[vertex]." : ""}; func += fmt::format("case {}:return {}{}[masked_index];", static_cast(IR::Attribute::PositionX) >> 2, position_idx, position_name); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; func += fmt::format("case {}:return in_attr{}{}[masked_index];", - base_attribute_value + i, i, vertex_idx); + base_attribute_value + index, index, vertex_idx); } func += "default: return 0.0;}}"; header += func; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da2..8deaf5760 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -171,7 +171,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3d2ba2eee..16e2a8502 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,7 +179,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { ctx.AddF32("{}=0.f;", inst, attr); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 6420aaa21..298881c7b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -20,8 +20,8 @@ void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { - if (!ctx.info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { continue; } const auto& info_array{ctx.output_generics.at(index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4c6501129..af4fb0c69 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -557,7 +557,7 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); + DefineInputs(program); DefineOutputs(program); } @@ -693,16 +693,16 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + index); labels.push_back(OpLabel()); } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); @@ -710,7 +710,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Const(0.0f)); size_t label_index{0}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{is_array ? OpAccessChain(input_f32, input_position, vertex, masked_index) @@ -719,18 +719,18 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturnValue(result); ++label_index; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - if (!info.input_generics[i].used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast(i))}; + const auto type{AttrTypes(*this, static_cast(index))}; if (!type) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(i)}; + const Id generic_id{input_generics.at(index)}; const Id pointer{is_array ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) : OpAccessChain(type->pointer, generic_id, masked_index)}; @@ -758,19 +758,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - literals.push_back(base_attribute_value + static_cast(i)); + literals.push_back(base_attribute_value + static_cast(index)); labels.push_back(OpLabel()); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { literals.push_back(static_cast(IR::Attribute::ClipDistance0) >> 2); labels.push_back(OpLabel()); literals.push_back(static_cast(IR::Attribute::ClipDistance4) >> 2); @@ -781,28 +781,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturn(); size_t label_index{0}; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - if (output_generics[i][0].num_components != 4) { + if (output_generics[index][0].num_components != 4) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[i][0].id}; + const Id generic_id{output_generics[index][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); @@ -1146,7 +1146,10 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1183,15 +1186,20 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } - if (info.loads_primitive_id) { + if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); } - if (info.loads_position) { + if (loads.AnyComponent(IR::Attribute::PositionX)) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } } - if (info.loads_instance_id) { + if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { @@ -1199,7 +1207,7 @@ void EmitContext::DefineInputs(const Info& info) { base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } - if (info.loads_vertex_id) { + if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { @@ -1207,24 +1215,24 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } - if (info.loads_front_face) { + if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } - if (info.loads_point_coord) { + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); } - if (info.loads_tess_coord) { + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } - for (size_t index = 0; index < info.input_generics.size(); ++index) { - if (!runtime_info.previous_stage_stores_generic[index]) { - continue; - } - const InputVarying generic{info.input_generics[index]}; - if (!generic.used) { - continue; - } + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { + continue; + } + if (!loads.Generic(index)) { + continue; + } if (input_type == AttributeType::Disabled) { continue; } @@ -1234,10 +1242,13 @@ void EmitContext::DefineInputs(const Info& info) { Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } if (stage != Stage::Fragment) { continue; } - switch (generic.interpolation) { + switch (info.interpolation[index]) { case Interpolation::Smooth: // Default // Decorate(id, spv::Decoration::Smooth); @@ -1266,42 +1277,42 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const IR::Program& program) { const Info& info{program.info}; const std::optional invocations{program.invocations}; - if (info.stores_position || stage == Stage::VertexB) { + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || runtime_info.fixed_state_point_size) { + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } - if (info.stores_layer && + if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } - if (info.stores_viewport_index && + if (info.stores[IR::Attribute::ViewportIndex] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 527685fb8..e277bc358 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -300,7 +300,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineInputs(const Info& info); + void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 278c262f8..ddb86d070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -281,11 +281,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); break; } - if (program.info.stores_point_size) { + if (program.info.stores[IR::Attribute::PointSize]) { ctx.AddCapability(spv::Capability::GeometryPointSize); } ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; @@ -377,20 +385,21 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_viewport_index) { + if (info.stores[IR::Attribute::ViewportIndex]) { ctx.AddCapability(spv::Capability::MultiViewport); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { ctx.AddExtension("SPV_NV_viewport_array2"); ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); } - if (info.stores_layer || info.stores_viewport_index) { + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 85bd72389..77fbb2b2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,7 +298,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 090bc1c08..8369d0d84 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -31,6 +31,10 @@ public: return sph; } + [[nodiscard]] const std::array& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + [[nodiscard]] Stage ShaderStage() const noexcept { return stage; } @@ -41,6 +45,7 @@ public: protected: ProgramHeader sph{}; + std::array gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; }; diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30..ca1199494 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast(static_cast(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d..ebcaa8bc2 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152..5e32ac784 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); - } - info.loads_position |= header.vtg.imap_systemb.position != 0; - } - if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings + } + if (info.stores_indexed_attributes) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d46be1638..ee1887b56 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -34,6 +34,7 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 6933750aa..bd6c2bfb5 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -37,7 +37,9 @@ struct ProgramHeader { BitField<15, 1, u32> kills_pixels; BitField<16, 1, u32> does_global_store; BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; @@ -79,24 +81,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } imap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } imap_generic_vector[16]; + std::array imap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { @@ -122,24 +110,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } omap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } omap_generic_vector[16]; + std::array omap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // OmapColor @@ -157,18 +131,24 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return imap_generic_vector[index >> 1].first != 0; - } - return imap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } - [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return omap_generic_vector[index >> 1].first != 0; - } - return omap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } } vtg; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 63fe2afaf..f3f83a258 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "shader_recompiler/varying_state.h" namespace Shader { @@ -60,7 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; - std::bitset<32> previous_stage_stores_generic{}; + VaryingState previous_stage_stores; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a20e15d2e..4ef4dbd40 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" #include #include @@ -44,11 +45,6 @@ enum class Interpolation { NoPerspective, }; -struct InputVarying { - Interpolation interpolation{Interpolation::Smooth}; - bool used{false}; -}; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -121,18 +117,10 @@ struct Info { bool uses_subgroup_shuffles{}; std::array uses_patches{}; - std::array input_generics{}; - bool loads_primitive_id{}; - bool loads_position{}; - bool loads_color_front_diffuse{}; - bool loads_fixed_fnc_textures{}; - bool loads_point_coord{}; - bool loads_instance_id{}; - bool loads_vertex_id{}; - bool loads_front_face{}; - bool loads_legacy_varyings{}; - - bool loads_tess_coord{}; + std::array interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; bool loads_indexed_attributes{}; @@ -140,21 +128,6 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::bitset<32> stores_generics{}; - bool stores_layer{}; - bool stores_viewport_index{}; - bool stores_point_size{}; - bool stores_position{}; - bool stores_color_front_diffuse{}; - bool stores_color_front_specular{}; - bool stores_color_back_diffuse{}; - bool stores_color_back_specular{}; - bool stores_fixed_fnc_textures{}; - bool stores_clip_distance{}; - bool stores_fog_coordinate{}; - bool stores_viewport_mask{}; - bool stores_legacy_varyings{}; - bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] || mask[static_cast(base) + 1] || + mask[static_cast(base) + 2] || mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] && mask[static_cast(base) + 1] && + mask[static_cast(base) + 2] && mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded671..471d5686a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b7745..06e39a503 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b56..429cab30d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f8..da4721e6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d..d0adc0127 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. From 8a3427a4c857aa08e365d1776d1f0d9f32639c9c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:40:24 -0300 Subject: [PATCH 721/770] glasm: Add passthrough geometry shader support --- .../backend/glasm/emit_context.cpp | 5 ++-- .../backend/glasm/emit_glasm.cpp | 28 +++++++++++++++---- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ .../renderer_opengl/gl_shader_cache.cpp | 2 +- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 80dad9ff3..069c019ad 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,13 +83,14 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { - if (info.loads.Generic(index)) { + if (loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { + if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2b96977b3..64787b353 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -304,6 +304,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_viewport_array2;"; } } + if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { + header += "OPTION NV_geometry_shader_passthrough;"; + } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { header += "OPTION EXT_shader_image_load_formatted;"; } @@ -410,11 +413,26 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I runtime_info.tess_clockwise ? "CW" : "CCW"); break; case Stage::Geometry: - header += fmt::format("PRIMITIVE_IN {};" - "PRIMITIVE_OUT {};" - "VERTICES_OUT {};", - InputPrimitive(runtime_info.input_topology), - OutputPrimitive(program.output_topology), program.output_vertices); + header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); + if (program.is_geometry_passthrough) { + if (profile.support_geometry_shader_passthrough) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (program.info.passthrough.Generic(index)) { + header += fmt::format("PASSTHROUGH result.attrib[{}];", index); + } + } + if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + header += "PASSTHROUGH result.position;"; + } + } else { + LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); + } + } else { + header += + fmt::format("VERTICES_OUT {};" + "PRIMITIVE_OUT {};", + program.output_vertices, OutputPrimitive(program.output_topology)); + } break; case Stage::Compute: header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 27be347e6..6818951f2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad7b01b06..45ddf5e01 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,10 @@ public: return has_depth_buffer_float; } + bool HasGeometryShaderPassthrough() const { + return has_geometry_shader_passthrough; + } + bool HasNvGpuShader5() const { return has_nv_gpu_shader_5; } @@ -174,6 +178,7 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_geometry_shader_passthrough{}; bool has_nv_gpu_shader_5{}; bool has_shader_int64{}; bool has_amd_shader_half_float{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 06e39a503..af8e9f44d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,7 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), - .support_geometry_shader_passthrough = false, // TODO + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), From 8612b5fec5d39b904f9fddbbee3e06437d49429c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:42:07 -0300 Subject: [PATCH 722/770] shader: Use std::bit_cast instead of Common::BitCast for passthrough --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 6b4b0ce5b..2bb1d24a4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include #include @@ -142,8 +143,8 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + const auto& mask{env.GpPassthroughMask()}; + program.info.passthrough.mask |= ~std::bit_cast>(mask); } break; } From f7352411f08c3a099b753d290540bb7c02fecac3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 24 Jun 2021 20:13:21 -0400 Subject: [PATCH 723/770] glsl: Add passthrough geometry shader support --- .../backend/glsl/emit_context.cpp | 30 ++++++++++++++----- .../backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_special.cpp | 3 ++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0d7f7bc3b..36527bbd4 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -236,6 +236,9 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { if (!StoresPerVertexAttributes(ctx.stage)) { return; } + if (ctx.uses_geometry_passthrough) { + return; + } header += "out gl_PerVertex{vec4 gl_Position;"; if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; @@ -272,12 +275,13 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) - : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, + uses_geometry_passthrough{program.is_geometry_passthrough && + profile.support_geometry_shader_passthrough} { if (profile.need_fastmath_off) { header += "#pragma optionNV(fastmath off)\n"; } SetupExtensions(); - stage = program.stage; switch (program.stage) { case Stage::VertexA: case Stage::VertexB: @@ -295,10 +299,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; case Stage::Geometry: stage_name = "gs"; - header += fmt::format("layout({})in;layout({},max_vertices={})out;" - "in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", - InputPrimitive(runtime_info.input_topology), - OutputPrimitive(program.output_topology), program.output_vertices); + header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); + if (uses_geometry_passthrough) { + header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; + break; + } else if (program.is_geometry_passthrough && + !profile.support_geometry_shader_passthrough) { + LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported"); + } + header += fmt::format( + "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", + OutputPrimitive(program.output_topology), program.output_vertices); break; case Stage::Fragment: stage_name = "fs"; @@ -329,7 +340,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + const auto qualifier{uses_geometry_passthrough ? "passthrough" + : fmt::format("location={}", index)}; + header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, InterpDecorator(info.interpolation[index]), index, InputArrayDecorator(stage)); } @@ -412,6 +425,9 @@ void EmitContext::SetupExtensions() { if (info.uses_derivatives && profile.support_gl_derivative_control) { header += "#extension GL_ARB_derivative_control : enable\n"; } + if (uses_geometry_passthrough) { + header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index ecdf6e5bc..dd7397489 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -157,6 +157,7 @@ public: bool uses_y_direction{}; bool uses_cc_carry{}; + bool uses_geometry_passthrough{}; private: void SetupExtensions(); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 298881c7b..9b866f889 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -17,6 +17,9 @@ std::string_view OutputVertexIndex(EmitContext& ctx) { } void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.uses_geometry_passthrough) { + return; + } if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } From 00fa09dc45936005b2c27d858ab70213c7e1482d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 24 Jun 2021 20:14:31 -0400 Subject: [PATCH 724/770] glsl: Declare local memory in main --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 8deaf5760..46d72963d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -217,14 +217,14 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR EmitCode(ctx, program); const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); - if (program.local_memory_size > 0) { - ctx.header += fmt::format("uint lmem[{}];", Common::AlignUp(program.local_memory_size, 4)); - } if (program.shared_memory_size > 0) { ctx.header += fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); } ctx.header += "void main(){\n"; + if (program.local_memory_size > 0) { + ctx.header += fmt::format("uint lmem[{}];", Common::AlignUp(program.local_memory_size, 4)); + } DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { ctx.header += "uint carry;"; From 1152d66ddd4e7b29b53e01990fef77e4cff20e24 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:28:48 -0400 Subject: [PATCH 725/770] general: Add setting shader_backend GLASM is getting good enough that we can move it out of advanced graphics settings. This removes the setting `use_assembly_shaders`, opting for a enum class `shader_backend`. This comes with the benefits that it is extensible for additional shader backends besides GLSL and GLASM, and this will work better with a QComboBox. Qt removes the related assembly shader setting from the Advanced Graphics section and places it as a new QComboBox in the API Settings group. This will replace the Vulkan device selector when OpenGL is selected. Additionally, mark all of the custom anisotropic filtering settings as "WILL BREAK THINGS", as that is the case with a select few games. --- src/common/settings.cpp | 4 +- src/common/settings.h | 8 +- src/core/telemetry_session.cpp | 4 +- src/video_core/renderer_opengl/gl_device.cpp | 10 +- src/yuzu/configuration/config.cpp | 7 +- src/yuzu/configuration/config.h | 3 +- src/yuzu/configuration/configure_graphics.cpp | 76 +++++++---- src/yuzu/configuration/configure_graphics.h | 4 +- src/yuzu/configuration/configure_graphics.ui | 124 ++++++++++++++---- .../configure_graphics_advanced.cpp | 7 - .../configure_graphics_advanced.h | 1 - .../configure_graphics_advanced.ui | 18 +-- src/yuzu_cmd/config.cpp | 2 +- src/yuzu_cmd/default_ini.h | 7 +- 14 files changed, 185 insertions(+), 90 deletions(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf5514386..66268ea0f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -57,7 +57,7 @@ void LogSettings() { log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); - log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); @@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); - values.use_assembly_shaders.SetGlobal(true); + values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_caches_gc.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ac0590690..32dfb1d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -24,6 +24,12 @@ enum class RendererBackend : u32 { Vulkan = 1, }; +enum class ShaderBackend : u32 { + GLSL = 0, + GLASM = 1, + SPIRV = 2, +}; + enum class GPUAccuracy : u32 { Normal = 0, High = 1, @@ -334,7 +340,7 @@ struct Values { Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; BasicSetting disable_fps_limit{false, "disable_fps_limit"}; - Setting use_assembly_shaders{false, "use_assembly_shaders"}; + Setting shader_backend{ShaderBackend::GLASM, "shader_backend"}; Setting use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting use_fast_gpu_time{true, "use_fast_gpu_time"}; Setting use_caches_gc{false, "use_caches_gc"}; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 066cb23e4..422de3a7d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); - AddField(field_type, "Renderer_UseAssemblyShaders", - Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_ShaderBackend", + static_cast(Settings::values.shader_backend.GetValue())); AddField(field_type, "Renderer_UseAsynchronousShaders", Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6818951f2..c4eeed53b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,9 +172,10 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && - GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_assembly_shaders = + Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && + GLAD_GL_NV_transform_feedback2; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && @@ -187,7 +188,8 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { + if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index dc69574a9..52b3ed02e 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -814,7 +814,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); - ReadGlobalSetting(Settings::values.use_assembly_shaders); + ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_caches_gc); @@ -1345,7 +1345,10 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_nvdec_emulation); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); - WriteGlobalSetting(Settings::values.use_assembly_shaders); + WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), + static_cast(Settings::values.shader_backend.GetValue(global)), + static_cast(Settings::values.shader_backend.GetDefault()), + Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_caches_gc); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 96f9b6de1..4bbb9f1cd 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -180,5 +180,6 @@ private: // These metatype declarations cannot be in common/settings.h because core is devoid of QT Q_DECLARE_METATYPE(Settings::CPUAccuracy); -Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::GPUAccuracy); +Q_DECLARE_METATYPE(Settings::RendererBackend); +Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 4d5b4c0e6..463448dbf 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -26,19 +26,25 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->setupUi(this); + ui->backend->addItem(QStringLiteral("GLSL")); + ui->backend->addItem(tr("GLASM (NVIDIA Only)")); + ui->backend->addItem(QStringLiteral("SPIR-V")); + SetupPerGameUI(); SetConfiguration(); connect(ui->api, qOverload(&QComboBox::currentIndexChanged), this, [this] { - UpdateDeviceComboBox(); + UpdateAPILayout(); if (!Settings::IsConfiguringGlobal()) { ConfigurationShared::SetHighlight( - ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); + ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); } }); connect(ui->device, qOverload(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->backend, qOverload(&QComboBox::activated), this, + [this](int backend) { UpdateShaderBackendSelection(backend); }); connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); @@ -48,6 +54,10 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) UpdateBackgroundColorButton(new_bg_color); }); + for (const auto& device : vulkan_devices) { + ui->device->addItem(device); + } + ui->bg_label->setVisible(Settings::IsConfiguringGlobal()); ui->bg_combobox->setVisible(!Settings::IsConfiguringGlobal()); } @@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) { } } +void ConfigureGraphics::UpdateShaderBackendSelection(int backend) { + if (backend == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) { + shader_backend = static_cast(backend); + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); - ui->api->setEnabled(runtime_lock); + ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); @@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() { ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); - ConfigurationShared::SetHighlight(ui->api_layout, + ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, @@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() { ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); } - UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue())); - UpdateDeviceComboBox(); + UpdateAPILayout(); } void ConfigureGraphics::ApplyConfiguration() { @@ -128,6 +146,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.shader_backend.UsingGlobal()) { + Settings::values.shader_backend.SetValue(shader_backend); + } if (Settings::values.vulkan_device.UsingGlobal()) { Settings::values.vulkan_device.SetValue(vulkan_device); } @@ -139,15 +160,22 @@ void ConfigureGraphics::ApplyConfiguration() { } else { if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(true); } else { Settings::values.renderer_backend.SetGlobal(false); Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); - if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + Settings::values.shader_backend.SetGlobal(false); + Settings::values.vulkan_device.SetGlobal(true); + Settings::values.shader_backend.SetValue(shader_backend); + break; + case Settings::RendererBackend::Vulkan: + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(false); Settings::values.vulkan_device.SetValue(vulkan_device); - } else { - Settings::values.vulkan_device.SetGlobal(true); + break; } } @@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { ui->bg_button->setIcon(color_icon); } -void ConfigureGraphics::UpdateDeviceComboBox() { - ui->device->clear(); - - bool enabled = false; - +void ConfigureGraphics::UpdateAPILayout() { if (!Settings::IsConfiguringGlobal() && ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + vulkan_device = Settings::values.vulkan_device.GetValue(true); + shader_backend = Settings::values.shader_backend.GetValue(true); + ui->device_widget->setEnabled(false); + ui->backend_widget->setEnabled(false); + } else { vulkan_device = Settings::values.vulkan_device.GetValue(); + shader_backend = Settings::values.shader_backend.GetValue(); + ui->device_widget->setEnabled(true); + ui->backend_widget->setEnabled(true); } + switch (GetCurrentGraphicsBackend()) { case Settings::RendererBackend::OpenGL: - ui->device->addItem(tr("OpenGL Graphics Device")); - enabled = false; + ui->backend->setCurrentIndex(static_cast(shader_backend)); + ui->device_widget->setVisible(false); + ui->backend_widget->setVisible(true); break; case Settings::RendererBackend::Vulkan: - for (const auto& device : vulkan_devices) { - ui->device->addItem(device); - } ui->device->setCurrentIndex(vulkan_device); - enabled = !vulkan_devices.empty(); + ui->device_widget->setVisible(true); + ui->backend_widget->setVisible(false); break; } - // If in per-game config and use global is selected, don't enable. - enabled &= !(!Settings::IsConfiguringGlobal() && - ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); - ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } void ConfigureGraphics::RetrieveVulkanDevices() try { diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 6418115cf..c866b911b 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -34,8 +34,9 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); - void UpdateDeviceComboBox(); + void UpdateAPILayout(); void UpdateDeviceSelection(int device); + void UpdateShaderBackendSelection(int backend); void RetrieveVulkanDevices(); @@ -53,4 +54,5 @@ private: std::vector vulkan_devices; u32 vulkan_device{}; + Settings::ShaderBackend shader_backend{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 5b999d84d..099ddbb7c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -23,7 +23,7 @@ - + 0 @@ -40,37 +40,107 @@ 6 + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Shader Backend: + + + + + + + + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Device: + + + + + + + + + - - - API: - - - - - - - - OpenGL + + + + 0 - - - - Vulkan + + 0 - + + 0 + + + 0 + + + + + API: + + + + + + + + 0 + 0 + + + + + OpenGL + + + + + Vulkan + + + + + - - - - Device: - - - - - - diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a9e611125..38276feb1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); - ui->use_assembly_shaders->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); - ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -58,8 +56,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, - ui->use_assembly_shaders, use_assembly_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); @@ -100,7 +96,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); - ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); @@ -112,8 +107,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { } ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); - ConfigurationShared::SetColoredTristate( - ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, use_asynchronous_shaders); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 9148aacf2..7356e6916 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -35,7 +35,6 @@ private: std::unique_ptr ui; ConfigurationShared::CheckState use_vsync; - ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_caches_gc; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ad0840355..772e5fed3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -76,16 +76,6 @@ - - - - Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental. - - - Use assembly shaders (experimental, Nvidia OpenGL only) - - - @@ -144,22 +134,22 @@ - 2x + 2x (WILL BREAK THINGS) - 4x + 4x (WILL BREAK THINGS) - 8x + 8x (WILL BREAK THINGS) - 16x + 16x (WILL BREAK THINGS) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 763df6dd6..640d7d111 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -458,7 +458,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); ReadSetting("Renderer", Settings::values.use_vsync); ReadSetting("Renderer", Settings::values.disable_fps_limit); - ReadSetting("Renderer", Settings::values.use_assembly_shaders); + ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a6ca7b6cd..b7115b06a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -248,9 +248,10 @@ max_anisotropy = # 0 (default): Off, 1: On use_vsync = -# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. -# 0: Off, 1 (default): On -use_assembly_shaders = +# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is +# not available and GLASM is selected, GLSL will be used. +# 0: GLSL, 1 (default): GLASM, 2: SPIR-V +shader_backend = # Whether to allow asynchronous shader building. # 0 (default): Off, 1: On From fb9b1787f86d069db27fe0af44ded042c6d8de39 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Tue, 22 Jun 2021 01:12:11 -0400 Subject: [PATCH 726/770] video_core: Enable GL SPIR-V shaders --- .../renderer_opengl/gl_compute_pipeline.cpp | 17 +++-- .../renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 8 ++- src/video_core/renderer_opengl/gl_device.h | 11 ++++ .../renderer_opengl/gl_graphics_pipeline.cpp | 64 ++++++++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 40 +++++++++--- 7 files changed, 105 insertions(+), 38 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 2d6442d74..c63e87a56 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -5,6 +5,7 @@ #include #include "common/cityhash.h" +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -40,15 +41,23 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code) + std::string code, std::vector code_v) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { - if (device.UseAssemblyShaders()) { - assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); + break; + case Settings::ShaderBackend::GLASM: + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + break; + case Settings::ShaderBackend::SPIRV: + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); + LinkProgram(source_program.handle); + break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5fc45f26..50c676365 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code); + std::string code, std::vector code_v); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c4eeed53b..99f8769fc 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -177,6 +177,11 @@ Device::Device() { GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + shader_backend = (Settings::values.shader_backend.GetValue() == + Settings::ShaderBackend::GLASM) == use_assembly_shaders + ? Settings::values.shader_backend.GetValue() + : Settings::ShaderBackend::GLSL; + // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)); @@ -188,8 +193,7 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - !use_assembly_shaders) { + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 45ddf5e01..ee992aed4 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -8,6 +8,10 @@ #include "common/common_types.h" #include "shader_recompiler/stage.h" +namespace Settings { +enum class ShaderBackend : u32; +}; + namespace OpenGL { class Device { @@ -148,6 +152,10 @@ public: return need_fastmath_off; } + Settings::ShaderBackend GetShaderBackend() const { + return shader_backend; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -159,6 +167,9 @@ private: u32 max_varyings{}; u32 max_compute_shared_memory_size{}; u32 max_glasm_storage_buffer_blocks{}; + + Settings::ShaderBackend shader_backend{}; + bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a93b03cf7..1f19b5825 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,7 +3,11 @@ // Refer to the license.txt file included. #include +#include +#include +#include +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -179,7 +183,8 @@ GraphicsPipeline::GraphicsPipeline( Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, - const std::array& infos, const GraphicsPipelineKey& key_) + std::array, 5> sources_spirv, const std::array& infos, + const GraphicsPipelineKey& key_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, key{key_} { @@ -232,29 +237,44 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; + auto func{ + [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); } - if (device.UseAssemblyShaders()) { - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } else { - AttachShader(Stage(stage), program.handle, code); + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + case Settings::ShaderBackend::GLASM: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + } break; + case Settings::ShaderBackend::SPIRV: { + const auto code{sources_spirv[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + } } - } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built.store(true, std::memory_order_relaxed); + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index f82d712f8..5f5d57385 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -78,6 +78,7 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, + std::array, 5> sources_spirv, const std::array& infos, const GraphicsPipelineKey& key_); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index af8e9f44d..cde0f54c9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -15,6 +15,7 @@ #include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/settings.h" #include "common/thread_worker.h" #include "core/core.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -415,6 +416,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; + std::array, 5> sources_spirv; Shader::Backend::Bindings binding; Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; @@ -431,17 +433,23 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; - if (use_glasm) { - sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::GLASM: + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::SPIRV: + sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); + break; } previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, key); + return std::make_unique( + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, sources_spirv, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -478,10 +486,24 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) - : EmitGLSL(profile, program)}; + + std::string code{}; + std::vector code_spirv; + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + code = EmitGLSL(profile, program); + break; + case Settings::ShaderBackend::GLASM: + code = EmitGLASM(profile, info, program); + break; + case Settings::ShaderBackend::SPIRV: + code_spirv = EmitSPIRV(profile, program); + break; + } + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, code); + kepler_compute, program_manager, program.info, code, + code_spirv); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; From 5b2b0634a1e6b9fd30999d052255669cbf9c5ef6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 04:38:55 -0300 Subject: [PATCH 727/770] spirv: Fix code emission when descriptor aliasing is unsupported Fixes OpenGL. --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index af4fb0c69..0926dcf14 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -911,14 +911,15 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } - IR::Type types{info.used_constant_buffer_types}; if (!profile.support_descriptor_aliasing) { DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', sizeof(u32[4])); for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { binding += desc.count; } + return; } + IR::Type types{info.used_constant_buffer_types}; if (True(types & IR::Type::U8)) { if (profile.support_int8) { DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); From 57a8921e01a90ff5993079dd638a6c48e5781756 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:21:51 -0300 Subject: [PATCH 728/770] vk_graphics_pipeline: Implement line width --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 1 + .../renderer_vulkan/vk_state_tracker.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 7 ++++++- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 5 +++++ 8 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6d664ed6b..3363a6877 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -705,11 +705,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_LINE_WIDTH, }; if (key.state.extended_dynamic_state) { static constexpr std::array extended{ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f04c3394c..bb7301c53 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -541,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthBoundsTestEnable(regs); @@ -676,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineWidth()) { + return; + } + const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; + scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c954fa7f8..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -125,6 +125,7 @@ private: void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 0ebe0473f..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,10 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, - DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, - DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, - StencilOp, StencilTestEnable, VertexBuffers, VertexInput, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, + DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -86,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(cull_face)] = CullMode; @@ -180,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); SetupDirtyDepthBoundsEnable(tables); SetupDirtyDepthTestEnable(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1976b7e9b..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -31,6 +31,7 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + LineWidth, CullMode, DepthBoundsEnable, @@ -44,7 +45,7 @@ enum : u8 { Blending, ViewportSwizzles, - Last + Last, }; static_assert(Last <= std::numeric_limits::max()); @@ -93,6 +94,10 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchLineWidth() const { + return Exchange(Dirty::LineWidth, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index da4721e6b..912e03c5c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -227,7 +227,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthBiasClamp = true, .fillModeNonSolid = true, .depthBounds = is_depth_bounds_supported, - .wideLines = false, + .wideLines = true, .largePoints = true, .alphaToOne = false, .multiViewport = true, @@ -703,7 +703,6 @@ void Device::CheckSuitability(bool requires_swapchain) const { const std::array feature_report{ std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), @@ -712,6 +711,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), + std::make_pair(features.wideLines, "wideLines"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d7e9fac22..bbf0fccae 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -121,6 +121,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthTestEnableEXT); X(vkCmdSetDepthWriteEnableEXT); X(vkCmdSetFrontFaceEXT); + X(vkCmdSetLineWidth); X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index d43b606f1..d76bb4324 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -231,6 +231,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; + PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetScissor vkCmdSetScissor{}; PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; @@ -1198,6 +1199,10 @@ public: dld->vkCmdSetFrontFaceEXT(handle, front_face); } + void SetLineWidth(float line_width) const noexcept { + dld->vkCmdSetLineWidth(handle, line_width); + } + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); } From f94f0be5215369a6985247ad936d9d9f43c9b140 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:25:19 -0300 Subject: [PATCH 729/770] vk_graphics_pipeline: Implement smooth lines --- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 ++++++++++ .../vulkan_common/vulkan_device.cpp | 41 ++++++++++++++++--- src/video_core/vulkan_common/vulkan_device.h | 6 +++ 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 7563dc462..d089da8a4 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -88,6 +88,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); + smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 66b57b636..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -195,6 +195,7 @@ struct FixedPipelineState { BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; BitField<13, 1, u32> conservative_raster_enable; + BitField<14, 1, u32> smooth_lines; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3363a6877..f0ae0b0d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -80,6 +80,14 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); } +bool IsLine(VkPrimitiveTopology topology) { + static constexpr std::array line_topologies{ + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, + // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, + }; + return std::ranges::find(line_topologies, topology) == line_topologies.end(); +} + VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { union Swizzle { u32 raw; @@ -616,6 +624,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .lineRasterizationMode = key.state.smooth_lines != 0 + ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT + : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, + .stippledLineEnable = VK_FALSE, // TODO + .lineStippleFactor = 0, + .lineStipplePattern = 0, + }; VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, .pNext = nullptr, @@ -632,6 +650,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; + if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { + line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); + } if (device.IsExtConservativeRasterizationSupported()) { conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 912e03c5c..4fa1470e8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -416,6 +416,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + if (ext_line_rasterization) { + line_raster = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, + .pNext = nullptr, + .rectangularLines = VK_TRUE, + .bresenhamLines = VK_FALSE, + .smoothLines = VK_TRUE, + .stippledRectangularLines = VK_FALSE, + .stippledBresenhamLines = VK_FALSE, + .stippledSmoothLines = VK_FALSE, + }; + SetNext(next, line_raster); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); + } + if (!ext_conservative_rasterization) { LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); } @@ -757,6 +774,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; + bool has_ext_line_rasterization{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -798,6 +816,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -918,17 +937,29 @@ std::vector Device::LoadExtensions(bool requires_surface) { } } if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - features.pNext = &dynamic_state; + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; + extended_dynamic_state.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + extended_dynamic_state.pNext = nullptr; + features.pNext = &extended_dynamic_state; physical.GetFeatures2KHR(features); - if (dynamic_state.extendedDynamicState) { + if (extended_dynamic_state.extendedDynamicState) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); ext_extended_dynamic_state = true; } } + if (has_ext_line_rasterization) { + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; + line_raster.pNext = nullptr; + features.pNext = &line_raster; + physical.GetFeatures2KHR(features); + if (line_raster.rectangularLines && line_raster.smoothLines) { + extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); + ext_line_rasterization = true; + } + } if (has_khr_workgroup_memory_explicit_layout) { VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; layout.sType = diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d0adc0127..26100166f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -259,6 +259,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_line_rasterization. + bool IsExtLineRasterizationSupported() const { + return ext_line_rasterization; + } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { return ext_vertex_input_dynamic_state; @@ -382,6 +387,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. From 8289eb108fefa9bfbb445c9f6b3f423a5d0eb771 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 25 Jun 2021 13:09:45 -0400 Subject: [PATCH 730/770] opengl: Implement LOP.CC Used by MH:Rise --- .../backend/glasm/emit_glasm_integer.cpp | 30 +++++++++++++++++-- .../backend/glsl/emit_glsl_integer.cpp | 14 +++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 587bbfe06..f55c26b76 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -7,6 +7,30 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLASM { +namespace { +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b, + std::string_view lop) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } + const auto ret{ctx.reg_alloc.Define(inst)}; + ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b); + if (zero) { + ctx.Add("SEQ.S {},{},0;", *zero, ret); + } + if (sign) { + ctx.Add("SLT.S {},{},0;", *sign, ret); + } +} +} // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { const std::array flags{ @@ -110,15 +134,15 @@ void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister } void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { - ctx.Add("AND.S {}.x,{},{};", inst, a, b); + BitwiseLogicalOp(ctx, inst, a, b, "AND"); } void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { - ctx.Add("OR.S {}.x,{},{};", inst, a, b); + BitwiseLogicalOp(ctx, inst, a, b, "OR"); } void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { - ctx.Add("XOR.S {}.x,{},{};", inst, a, b); + BitwiseLogicalOp(ctx, inst, a, b, "XOR"); } void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 2892074e1..38419f88f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -27,6 +27,14 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { ctx.AddU1("{}=int({})<0;", *sign, result); sign->Invalidate(); } + +void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, + char lop) { + const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; + ctx.Add("{}={}{}{};", result, a, lop, b); + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); +} } // Anonymous namespace void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { @@ -113,15 +121,15 @@ void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_vi } void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}={}&{};", inst, a, b); + BitwiseLogicalOp(ctx, inst, a, b, '&'); } void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}={}|{};", inst, a, b); + BitwiseLogicalOp(ctx, inst, a, b, '|'); } void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { - ctx.AddU32("{}={}^{};", inst, a, b); + BitwiseLogicalOp(ctx, inst, a, b, '^'); } void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, From 65daec8b75dafb96296c6066db9c1d696948e7fe Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 25 Jun 2021 16:34:40 -0400 Subject: [PATCH 731/770] glsl: Fix shared and local memory declarations account for the fact that program.*memory_size is in units of bytes. --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 46d72963d..ffdc6dbba 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -5,7 +5,7 @@ #include #include -#include "common/alignment.h" +#include "common/div_ceil.h" #include "common/settings.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" @@ -219,11 +219,11 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header.insert(0, version); if (program.shared_memory_size > 0) { ctx.header += - fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); + fmt::format("shared uint smem[{}];", Common::DivCeil(program.shared_memory_size, 4U)); } ctx.header += "void main(){\n"; if (program.local_memory_size > 0) { - ctx.header += fmt::format("uint lmem[{}];", Common::AlignUp(program.local_memory_size, 4)); + ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U)); } DefineVariables(ctx, ctx.header); if (ctx.uses_cc_carry) { From 5643a909bc3fa9f497d2f2e68650f823ed2944ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 01:14:06 -0300 Subject: [PATCH 732/770] shader: Fix disabled and unwritten attributes and varyings --- .../glsl/emit_glsl_context_get_set.cpp | 8 +++-- .../spirv/emit_spirv_context_get_set.cpp | 6 +++- .../renderer_opengl/renderer_opengl.cpp | 35 +++++++++++-------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 16e2a8502..d5424301b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,8 +179,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { - ctx.AddF32("{}=0.f;", inst, attr); + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + if (element == 3) { + ctx.AddF32("{}=1.f;", inst, attr); + } else { + ctx.AddF32("{}=0.f;", inst, attr); + } return; } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 77fbb2b2f..756de0a27 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,10 +298,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { + if (!type) { // Attribute is disabled return ctx.Const(0.0f); } + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + // Varying component is not written + return ctx.Const(type && element == 3 ? 1.0f : 0.0f); + } const Id generic_id{ctx.input_generics.at(index)}; const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; const Id value{ctx.OpLoad(type->id, pointer)}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b8777643b..dab0afe6d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -140,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, } AddTelemetryFields(); InitOpenGLObjects(); + + // Initialize default attributes to match hardware's disabled attributes + GLint max_attribs{}; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); + for (GLint attrib = 0; attrib < max_attribs; ++attrib) { + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + } + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -256,21 +276,6 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); - - // Enable seamless cubemaps when per texture parameters are not available - if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { - glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); - } - - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } void RendererOpenGL::AddTelemetryFields() { From fba6bd92d456b4d472ed37e663006fafeef154a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 17:46:01 -0300 Subject: [PATCH 733/770] vk_rasterizer: Workaround bug in VK_EXT_vertex_input_dynamic_state Workaround potential bug on Nvidia's driver where only updating high attributes leaves low attributes out dated. --- src/video_core/engines/maxwell_3d.h | 4 --- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 31 +++++++++++-------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 471d5686a..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -305,10 +305,6 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } - bool IsConstant() const { - return constant; - } - bool IsValid() const { return size != Size::Invalid; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0f0d780b5..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { const auto gl_index = static_cast(index); // Disable constant attributes. - if (attrib.IsConstant()) { + if (attrib.constant) { glDisableVertexAttribArray(gl_index); continue; } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d089da8a4..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -128,7 +128,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.enabled.Assign(input.constant ? 0 : 1); attribute.buffer.Assign(input.buffer); attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bb7301c53..99576b826 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -801,25 +801,30 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) boost::container::static_vector bindings; boost::container::static_vector attributes; + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexAttribute0 + index]) { - continue; + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = attribute.IsConstant() - ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 - : MaxwellToVK::VertexFormat(attribute.type, attribute.size), - .offset = attribute.offset, - }); + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { From b21bf79bd2627797d87c17f30c776b4e2476f019 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 28 Jun 2021 22:35:31 -0300 Subject: [PATCH 734/770] shader: Only apply shift on register mode for IADD3 --- .../impl/integer_add_three_input.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 33e2a51ae..b50017536 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -22,31 +22,33 @@ enum class Half : u64 { [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { constexpr bool is_signed{false}; switch (half) { + case Half::All: + return value; case Half::Lower: return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); case Half::Upper: return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); - default: - return value; } + throw NotImplementedException("Invalid half"); } [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { switch (shift) { + case Shift::None: + return value; case Shift::Right: return ir.ShiftRightLogical(value, ir.Imm32(16)); case Shift::Left: return ir.ShiftLeftLogical(value, ir.Imm32(16)); - default: - return value; } + throw NotImplementedException("Invalid shift"); } -void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c) { +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, + Shift shift = Shift::None) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<37, 2, Shift> shift; BitField<47, 1, u64> cc; BitField<48, 1, u64> x; BitField<49, 1, u64> neg_c; @@ -68,7 +70,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; lhs_1 = v.ir.IAdd(lhs_1, carry); } - const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, iadd3.shift)}; + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; v.X(iadd3.dest_reg, result); @@ -89,14 +91,16 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o void TranslatorVisitor::IADD3_reg(u64 insn) { union { u64 insn; + BitField<37, 2, Shift> shift; BitField<35, 2, Half> half_a; - BitField<31, 2, Half> half_c; BitField<33, 2, Half> half_b; - } iadd3{insn}; + BitField<31, 2, Half> half_c; + } const iadd3{insn}; + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; - IADD3(*this, insn, op_a, op_b, op_c); + IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); } void TranslatorVisitor::IADD3_cbuf(u64 insn) { From b9069c7891f2516ea037e9355daea284a1d540f1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Jun 2021 22:38:35 -0400 Subject: [PATCH 735/770] shader: Account for 33-bit IADD3 scenario --- .../translate/impl/integer_add_three_input.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index b50017536..040cfc10f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -36,8 +36,12 @@ enum class Half : u64 { switch (shift) { case Shift::None: return value; - case Shift::Right: - return ir.ShiftRightLogical(value, ir.Imm32(16)); + case Shift::Right: { + // 33-bit RS IADD3 edge case + const IR::U1 edge_case{ir.GetCarryFromOp(value)}; + const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; + return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; + } case Shift::Left: return ir.ShiftLeftLogical(value, ir.Imm32(16)); } @@ -67,6 +71,10 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o } IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { + // TODO: How does RS behave when X is set? + if (shift == Shift::Right) { + throw NotImplementedException("IADD3 X+RS"); + } const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; lhs_1 = v.ir.IAdd(lhs_1, carry); } From 2e5af95541adf581364ee3864be57f9b2b9a230f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 28 Jun 2021 23:44:03 -0400 Subject: [PATCH 736/770] shader: GCC fmt 8.0.0 fixes --- src/shader_recompiler/backend/glasm/emit_context.h | 8 +++++--- src/shader_recompiler/backend/glsl/emit_context.cpp | 2 +- src/shader_recompiler/backend/glsl/emit_context.h | 6 +++--- .../backend/glsl/emit_glsl_context_get_set.cpp | 4 ++-- src/shader_recompiler/backend/spirv/emit_context.cpp | 5 +++-- src/shader_recompiler/exception.h | 8 ++++---- .../frontend/maxwell/structured_control_flow.cpp | 2 +- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 9f86e55d3..1da51a996 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -37,21 +37,23 @@ public: template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { - code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); + code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst), + std::forward(args)...); // TODO: Remove this code += '\n'; } template void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { - code += fmt::format(format_str, reg_alloc.LongDefine(inst), std::forward(args)...); + code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst), + std::forward(args)...); // TODO: Remove this code += '\n'; } template void Add(const char* format_str, Args&&... args) { - code += fmt::format(format_str, std::forward(args)...); + code += fmt::format(fmt::runtime(format_str), std::forward(args)...); // TODO: Remove this code += '\n'; } diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 36527bbd4..0dcdff152 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -597,7 +597,7 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { func += comparison; const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; - func += fmt::format(return_statement, ssbo_name, ssbo_addr); + func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); }}; std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index dd7397489..d9b639d29 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -51,9 +51,9 @@ public: const auto var_def{var_alloc.AddDefine(inst, type)}; if (var_def.empty()) { // skip assigment. - code += fmt::format(format_str + 3, std::forward(args)...); + code += fmt::format(fmt::runtime(format_str + 3), std::forward(args)...); } else { - code += fmt::format(format_str, var_def, std::forward(args)...); + code += fmt::format(fmt::runtime(format_str), var_def, std::forward(args)...); } // TODO: Remove this code += '\n'; @@ -131,7 +131,7 @@ public: template void Add(const char* format_str, Args&&... args) { - code += fmt::format(format_str, std::forward(args)...); + code += fmt::format(fmt::runtime(format_str), std::forward(args)...); // TODO: Remove this code += '\n'; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index d5424301b..580063fa9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -61,14 +61,14 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, bit_offset, num_bits)}; if (!component_indexing_bug) { - const auto result{fmt::format(extraction, swizzle)}; + const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; ctx.Add("{}={};", ret, result); return; } const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; for (u32 i = 0; i < 4; ++i) { const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; - const auto result{fmt::format(extraction, swizzle_string)}; + const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)}; ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0926dcf14..865f34291 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -244,8 +244,9 @@ std::string_view StageName(Stage stage) { template void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { - ctx.Name(object, - fmt::format(format_str, StageName(ctx.stage), std::forward(args)...).c_str()); + ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), + std::forward(args)...) + .c_str()); } void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h index 43f08162d..337e7f0c8 100644 --- a/src/shader_recompiler/exception.h +++ b/src/shader_recompiler/exception.h @@ -37,21 +37,21 @@ class LogicError : public Exception { public: template LogicError(const char* message, Args&&... args) - : Exception{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} }; class RuntimeError : public Exception { public: template RuntimeError(const char* message, Args&&... args) - : Exception{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} }; class NotImplementedException : public Exception { public: template NotImplementedException(const char* message, Args&&... args) - : Exception{fmt::format(message, std::forward(args)...)} { + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} { Append(" is not implemented"); } }; @@ -60,7 +60,7 @@ class InvalidArgument : public Exception { public: template InvalidArgument(const char* message, Args&&... args) - : Exception{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(fmt::runtime(message), std::forward(args)...)} {} }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 10d05dc4c..06fde0017 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -174,7 +174,7 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->block->begin, stmt->block->end, + stmt->block->begin.Offset(), stmt->block->end.Offset(), reinterpret_cast(stmt->block)); break; case StatementType::Goto: From 1b27a2b59798c7028ad533b5d92b2d9900860d65 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Tue, 29 Jun 2021 14:55:32 -0400 Subject: [PATCH 737/770] configure_graphics: Re-order vulkan device populating --- src/yuzu/configuration/configure_graphics.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 463448dbf..927b4233b 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -26,6 +26,10 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->setupUi(this); + for (const auto& device : vulkan_devices) { + ui->device->addItem(device); + } + ui->backend->addItem(QStringLiteral("GLSL")); ui->backend->addItem(tr("GLASM (NVIDIA Only)")); ui->backend->addItem(QStringLiteral("SPIR-V")); @@ -54,10 +58,6 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) UpdateBackgroundColorButton(new_bg_color); }); - for (const auto& device : vulkan_devices) { - ui->device->addItem(device); - } - ui->bg_label->setVisible(Settings::IsConfiguringGlobal()); ui->bg_combobox->setVisible(!Settings::IsConfiguringGlobal()); } From 8722668b3c027f0132d0be07e867247debd08d30 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:42:17 -0300 Subject: [PATCH 738/770] emit_spirv: Workaround VK_KHR_shader_float_controls on fp16 Nvidia Fix regression on Fire Emblem: Three Houses when using native fp16. --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 13 ++++++++----- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ddb86d070..d7a86e270 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -319,7 +319,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit Id main_func) { const Info& info{program.info}; if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - LOG_WARNING(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -332,15 +332,15 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); } else { - LOG_WARNING(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } } - if (!profile.support_separate_denorm_behavior) { + if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { // No separate denorm behavior return; } if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - LOG_WARNING(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -353,13 +353,16 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); } else { - LOG_WARNING(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); } } } void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { + if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { + return; + } if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ee1887b56..6ff12387b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -58,6 +58,8 @@ struct Profile { bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; + /// Float controls break when fp16 is enabled + bool has_broken_fp16_float_controls{}; /// Dynamic vec4 indexing is broken on some OpenGL drivers bool has_gl_component_indexing_bug{}; /// The precise type qualifier is broken in the fragment stage of some drivers diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cde0f54c9..2ea9c9f07 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -206,6 +206,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7aaa40ef2..87b843e3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,6 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, .ignore_nan_fp_comparisons = false, }; host_info = Shader::HostTranslateInfo{ From 57171b23f9da0d9fa4b07bb77ba5c8ed0083a792 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:58:32 -0300 Subject: [PATCH 739/770] vulkan_device: Enable VK_EXT_extended_dynamic_state on RADV 21.2 onward --- src/video_core/vulkan_common/vulkan_device.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4fa1470e8..e297a3e92 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -511,10 +511,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectToolingInfo(); if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { - LOG_WARNING( - Render_Vulkan, - "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); - ext_extended_dynamic_state = false; + // Mask driver version variant + const u32 version = (properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { + LOG_WARNING(Render_Vulkan, + "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } } if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); From dbee32d302a5944bc8e99b55d956013503b66c6c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 1 Jul 2021 20:32:30 -0400 Subject: [PATCH 740/770] gl_shader_cache: Fixes for async shaders --- .../renderer_opengl/gl_shader_cache.cpp | 27 ++++++++++++++++--- .../renderer_opengl/gl_shader_cache.h | 2 ++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2ea9c9f07..2d7eb3e33 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -328,11 +328,32 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { if (is_new) { pipeline = CreateGraphicsPipeline(); } - current_pipeline = pipeline.get(); - if (!pipeline || !pipeline->IsBuilt()) { + if (!pipeline) { return nullptr; } - return pipeline.get(); + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); +} + +GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; + } + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; } ComputePipeline* ShaderCache::CurrentComputePipeline() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 9d5306293..a34110b37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -54,6 +54,8 @@ public: private: GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( From 57f222c56e6facb623dccfe1abdc2bdeba8535ec Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 2 Jul 2021 12:45:23 -0400 Subject: [PATCH 741/770] dual_vertex_pass: Clang format --- .../ir_opt/dual_vertex_pass.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index a926123f2..3d2c205c2 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -14,23 +14,23 @@ namespace Shader::Optimization { void VertexATransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Epilogue) { - return inst.Invalidate(); - } - } - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } } void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Prologue) { - return inst.Invalidate(); - } - } - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } } } // namespace Shader::Optimization From 7277d7fe96d53ae2b73491d91e0a54caf0206fe7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 3 Jul 2021 01:49:59 -0400 Subject: [PATCH 742/770] vulkan_device: Blacklist ampere devices from float16 math --- .../vulkan_common/vulkan_device.cpp | 29 +++++++++++++------ src/video_core/vulkan_common/vulkan_device.h | 6 ++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e297a3e92..7d66a43e7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -194,12 +194,22 @@ std::unordered_map GetFormatProperties(vk::Physica return format_properties; } +std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); + std::vector supported_extensions(std::size(extensions)); + for (const auto& extension : extensions) { + supported_extensions.emplace_back(extension.extensionName); + } + return supported_extensions; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical)} { + supported_extensions{GetSupportedExtensions(physical)}, + format_properties(GetFormatProperties(physical)) { CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); @@ -510,6 +520,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { + if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != + supported_extensions.end()) { + LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); + is_float16_supported = false; + } + } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { // Mask driver version variant const u32 version = (properties.driverVersion << 3) >> 3; @@ -778,10 +795,10 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; bool has_ext_line_rasterization{}; - for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional> status, const char* name, bool push) { - if (extension.extensionName != std::string_view(name)) { + if (extension != name) { return; } if (push) { @@ -1064,12 +1081,6 @@ void Device::CollectTelemetryParameters() { driver_id = driver.driverID; vendor_name = driver.driverName; - - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - reported_extensions.reserve(std::size(extensions)); - for (const auto& extension : extensions) { - reported_extensions.emplace_back(extension.extensionName); - } } void Device::CollectPhysicalMemoryInfo() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 26100166f..df394e384 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -301,7 +301,7 @@ public: /// Returns the list of available extensions. const std::vector& GetAvailableExtensions() const { - return reported_extensions; + return supported_extensions; } u64 GetDeviceLocalMemory() const { @@ -398,8 +398,8 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector reported_extensions; ///< Reported Vulkan extensions. + std::string vendor_name; ///< Device's driver name. + std::vector supported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. std::unordered_map format_properties; From 55233c2861a72bd777b75bce20c8d4e46c17a72f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 3 Jul 2021 03:07:50 -0400 Subject: [PATCH 743/770] vulkan_device: Add missing include algorithm --- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7d66a43e7..ceaee8a7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include From 11f04f1022d0820a1fdba38221ecd38f19d86d9e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 00:34:53 -0400 Subject: [PATCH 744/770] shader: Ignore global memory ops on devices lacking int64 support --- .../backend/glsl/emit_context.cpp | 6 ++-- .../backend/glsl/emit_glsl_memory.cpp | 34 ++++++++++++++---- .../backend/spirv/emit_context.cpp | 2 +- .../backend/spirv/emit_spirv_memory.cpp | 36 +++++++++++++++---- src/shader_recompiler/frontend/ir/opcodes.inc | 28 +++++++-------- src/shader_recompiler/profile.h | 1 + .../renderer_opengl/gl_shader_cache.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 8 files changed, 79 insertions(+), 30 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0dcdff152..e08d2d2eb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -378,7 +378,7 @@ void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } - if (info.uses_int64) { + if (info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } if (info.uses_int64_bit_atomics) { @@ -402,7 +402,7 @@ void EmitContext::SetupExtensions() { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n" "#extension GL_ARB_shader_group_vote : enable\n"; - if (!info.uses_int64) { + if (!info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } if (profile.support_gl_warp_intrinsics) { @@ -539,7 +539,7 @@ void EmitContext::DefineHelperFunctions() { if (info.uses_atomic_s32_max) { header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } - if (info.uses_global_memory) { + if (info.uses_global_memory && profile.support_int64) { header += DefineGlobalMemoryFunctions(); } if (info.loads_indexed_attributes) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index daef5fb84..e3957491f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -38,15 +39,27 @@ void EmitLoadGlobalS16(EmitContext&) { } void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32("{}=LoadGlobal32({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32("{}=LoadGlobal32({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32("{}=0u;", inst); } void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x2("{}=uvec2(0);", inst); } void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x4("{}=uvec4(0);", inst); } void EmitWriteGlobalU8(EmitContext&) { @@ -66,15 +79,24 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal32({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal32({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal64({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal64({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal128({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal128({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 865f34291..2d29d8c14 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -830,7 +830,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { } void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { - if (!info.uses_global_memory) { + if (!info.uses_global_memory || !profile.support_int64) { return; } using DefPtr = Id StorageDefinitions::*; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index ccebf170d..679ee2684 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -84,15 +84,27 @@ void EmitLoadGlobalS16(EmitContext&) { } Id EmitLoadGlobal32(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u); } Id EmitLoadGlobal64(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u); } Id EmitLoadGlobal128(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u, 0u, 0u); } void EmitWriteGlobalU8(EmitContext&) { @@ -112,15 +124,27 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9af750283..d91098c80 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -71,20 +71,20 @@ OPCODE(UndefU32, U32, OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, Opaque, ) +OPCODE(LoadGlobalS8, U32, Opaque, ) +OPCODE(LoadGlobalU16, U32, Opaque, ) +OPCODE(LoadGlobalS16, U32, Opaque, ) +OPCODE(LoadGlobal32, U32, Opaque, ) +OPCODE(LoadGlobal64, U32x2, Opaque, ) +OPCODE(LoadGlobal128, U32x4, Opaque, ) +OPCODE(WriteGlobalU8, Void, Opaque, U32, ) +OPCODE(WriteGlobalS8, Void, Opaque, U32, ) +OPCODE(WriteGlobalU16, Void, Opaque, U32, ) +OPCODE(WriteGlobalS16, Void, Opaque, U32, ) +OPCODE(WriteGlobal32, Void, Opaque, U32, ) +OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) +OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) // Storage buffer operations OPCODE(LoadStorageU8, U32, U32, U32, ) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6ff12387b..501dcaf71 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,7 @@ struct Profile { bool support_descriptor_aliasing{}; bool support_int8{}; bool support_int16{}; + bool support_int64{}; bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2d7eb3e33..58a4f0fb4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -168,6 +168,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_descriptor_aliasing = false, .support_int8 = false, .support_int16 = false, + .support_int64 = device.HasShaderInt64(), .support_vertex_instance_id = true, .support_float_controls = false, .support_separate_denorm_behavior = false, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87b843e3d..a2646fc6d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -280,6 +280,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_descriptor_aliasing = true, .support_int8 = true, .support_int16 = device.IsShaderInt16Supported(), + .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == From 41c6cb70f909d7b223824f3879e0009521e9142f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 20:48:54 -0400 Subject: [PATCH 745/770] glsl: Fix tracking of info.uses_shadow_lod --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5e32ac784..5ead930f1 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -505,10 +505,6 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BoundImageGradient: - case IR::Opcode::ImageSampleImplicitLod: - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: - case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageFetch: @@ -520,6 +516,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageQueryLod: { const auto flags{inst.Flags()}; const TextureType type{flags.type}; From 18fb9bdfa85e27ee4cb888da2c964cf95ccf072e Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Tue, 6 Jul 2021 15:46:27 -0400 Subject: [PATCH 746/770] configure_graphics: Mark SPIR-V as Experimental, Mesa only --- src/yuzu/configuration/configure_graphics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 927b4233b..fef211707 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -32,7 +32,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->backend->addItem(QStringLiteral("GLSL")); ui->backend->addItem(tr("GLASM (NVIDIA Only)")); - ui->backend->addItem(QStringLiteral("SPIR-V")); + ui->backend->addItem(QStringLiteral("SPIR-V (Experimental, Mesa Only)")); SetupPerGameUI(); From be54aad1c40bb50c71e7bcd6465c2fd372c11cb7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Jul 2021 00:18:30 -0300 Subject: [PATCH 747/770] maxwell_to_vk: Add R16_SNORM --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f9b9a11a..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -157,7 +157,7 @@ struct FormatTuple { {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM - {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ceaee8a7e..13d938434 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -116,6 +116,7 @@ std::unordered_map GetFormatProperties(vk::Physica VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, From 8390286a89dd259f0ff44cc95fc20d017b58046f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 9 Jul 2021 19:00:11 -0400 Subject: [PATCH 748/770] renderers: Disable async shader compilation The current implementation is prone to causing graphical issues. Disable until a better solution is implemented. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 99f8769fc..563b291cd 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,9 +182,11 @@ Device::Device() { ? Settings::values.shader_backend.GetValue() : Settings::ShaderBackend::GLSL; + // Completely disable async shaders for now, as it causes graphical glitches + use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - !(is_amd || (is_intel && !is_linux)); + // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + // !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2646fc6d..39db35175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -269,7 +269,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_asynchronous_shaders{false}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; From 2235a51b5d987cf8297211bb1778d75e6b794324 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 01:10:38 -0300 Subject: [PATCH 749/770] shader: Manually convert from array to bitset instead of using bit_cast --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 2bb1d24a4..83c77967d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include -#include #include #include #include @@ -144,7 +143,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; + } } break; } From 41493fbe89200a4a8321dec7b313872435c57df7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Jul 2021 01:04:52 -0400 Subject: [PATCH 750/770] renderers: Fix clang formatting --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index dab0afe6d..c9cfa6366 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,8 +24,8 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6fda06a7e..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f0ae0b0d6..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -503,9 +503,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { vertex_attributes.push_back({ .location = static_cast(index), .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, .offset = 0, }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 99576b826..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,7 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); From 4e4b8775b56a7f596e509aab597067815d005507 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Jul 2021 14:39:13 -0400 Subject: [PATCH 751/770] main: Update Shader Cache menu options This change adds two new context menu items to remove either the OpenGL or the Vulkan shader caches individually, and the provides the option to remove all caches for the selected title. This also changes the behavior of the open shader cache option. Now it creates the shader cache directory for the title if it does not yet exist. --- src/yuzu/game_list.cpp | 14 ++++++++-- src/yuzu/game_list.h | 4 ++- src/yuzu/main.cpp | 59 +++++++++++++++++++++++++++++++++--------- src/yuzu/main.h | 3 ++- 4 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 76c063c97..f746bd85d 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -520,9 +520,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QMenu* remove_menu = context_menu.addMenu(tr("Remove")); QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); - QAction* remove_shader_cache = remove_menu->addAction(tr("Remove Shader Cache")); QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); + QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); + QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); remove_menu->addSeparator(); + QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); @@ -540,6 +542,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri open_transferable_shader_cache->setVisible(program_id != 0); remove_update->setVisible(program_id != 0); remove_dlc->setVisible(program_id != 0); + remove_gl_shader_cache->setVisible(program_id != 0); + remove_vk_shader_cache->setVisible(program_id != 0); remove_shader_cache->setVisible(program_id != 0); remove_all_content->setVisible(program_id != 0); auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); @@ -569,8 +573,14 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_dlc, &QAction::triggered, [this, program_id]() { emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent); }); + connect(remove_gl_shader_cache, &QAction::triggered, [this, program_id, path]() { + emit RemoveFileRequested(program_id, GameListRemoveTarget::GlShaderCache, path); + }); + connect(remove_vk_shader_cache, &QAction::triggered, [this, program_id, path]() { + emit RemoveFileRequested(program_id, GameListRemoveTarget::VkShaderCache, path); + }); connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() { - emit RemoveFileRequested(program_id, GameListRemoveTarget::ShaderCache, path); + emit RemoveFileRequested(program_id, GameListRemoveTarget::AllShaderCache, path); }); connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index c9a9f4654..10339dcca 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -41,7 +41,9 @@ enum class GameListOpenTarget { }; enum class GameListRemoveTarget { - ShaderCache, + GlShaderCache, + VkShaderCache, + AllShaderCache, CustomConfiguration, }; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 562dfa620..a5159a1ee 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1655,9 +1655,9 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) { const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)}; - if (!Common::FS::Exists(shader_cache_folder_path)) { + if (!Common::FS::CreateDirs(shader_cache_folder_path)) { QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"), - tr("A shader cache for this title does not exist.")); + tr("Filed to create the shader cache directory for this title.")); return; } const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)}; @@ -1805,8 +1805,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ const std::string& game_path) { const QString question = [this, target] { switch (target) { - case GameListRemoveTarget::ShaderCache: - return tr("Delete Transferable Shader Cache?"); + case GameListRemoveTarget::GlShaderCache: + return tr("Delete OpenGL Transferable Shader Cache?"); + case GameListRemoveTarget::VkShaderCache: + return tr("Delete Vulkan Transferable Shader Cache?"); + case GameListRemoveTarget::AllShaderCache: + return tr("Delete All Transferable Shader Caches?"); case GameListRemoveTarget::CustomConfiguration: return tr("Remove Custom Game Configuration?"); default: @@ -1820,8 +1824,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } switch (target) { - case GameListRemoveTarget::ShaderCache: - RemoveTransferableShaderCache(program_id); + case GameListRemoveTarget::GlShaderCache: + case GameListRemoveTarget::VkShaderCache: + RemoveTransferableShaderCache(program_id, target); + break; + case GameListRemoveTarget::AllShaderCache: + RemoveAllTransferableShaderCaches(program_id); break; case GameListRemoveTarget::CustomConfiguration: RemoveCustomConfiguration(program_id, game_path); @@ -1829,18 +1837,27 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ } } -void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { +void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) { + const auto target_file_name = [target] { + switch (target) { + case GameListRemoveTarget::GlShaderCache: + return "opengl.bin"; + case GameListRemoveTarget::VkShaderCache: + return "vulkan.bin"; + default: + return ""; + } + }(); const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); - const auto transferable_shader_cache_file_path = - shader_cache_dir / "opengl" / "transferable" / fmt::format("{:016X}.bin", program_id); + const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id); + const auto target_file = shader_cache_folder_path / target_file_name; - if (!Common::FS::Exists(transferable_shader_cache_file_path)) { + if (!Common::FS::Exists(target_file)) { QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"), tr("A shader cache for this title does not exist.")); return; } - - if (Common::FS::RemoveFile(transferable_shader_cache_file_path)) { + if (Common::FS::RemoveFile(target_file)) { QMessageBox::information(this, tr("Successfully Removed"), tr("Successfully removed the transferable shader cache.")); } else { @@ -1849,6 +1866,24 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { } } +void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { + const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); + const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); + + if (!Common::FS::Exists(program_shader_cache_dir)) { + QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"), + tr("A shader cache for this title does not exist.")); + return; + } + if (Common::FS::RemoveDirRecursively(program_shader_cache_dir)) { + QMessageBox::information(this, tr("Successfully Removed"), + tr("Successfully removed the transferable shader caches.")); + } else { + QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"), + tr("Failed to remove the transferable shader cache directory.")); + } +} + void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) { const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path)); const auto config_file_name = diff --git a/src/yuzu/main.h b/src/yuzu/main.h index a50e5b9fe..3eb6aed56 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -282,7 +282,8 @@ private: void RemoveBaseContent(u64 program_id, const QString& entry_type); void RemoveUpdateContent(u64 program_id, const QString& entry_type); void RemoveAddOnContent(u64 program_id, const QString& entry_type); - void RemoveTransferableShaderCache(u64 program_id); + void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); + void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); std::optional SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); InstallResult InstallNSPXCI(const QString& filename); From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: [PATCH 752/770] shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- .../backend/glasm/emit_context.h | 2 +- src/shader_recompiler/backend/glasm/reg_alloc.h | 3 +-- .../backend/glsl/emit_glsl_floating_point.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 15 ++++++++++----- src/shader_recompiler/frontend/ir/opcodes.h | 3 ++- .../frontend/maxwell/control_flow.h | 1 - .../frontend/maxwell/structured_control_flow.cpp | 9 ++------- .../impl/atomic_operations_global_memory.cpp | 12 ++++++------ .../impl/integer_floating_point_conversion.cpp | 4 +++- .../translate/impl/load_store_attribute.cpp | 12 ++++++------ .../translate/impl/surface_atomic_operations.cpp | 3 --- .../maxwell/translate/impl/surface_load_store.cpp | 8 ++++---- .../global_memory_to_storage_buffer_pass.cpp | 6 +++--- .../renderer_vulkan/vk_graphics_pipeline.h | 4 +--- 14 files changed, 40 insertions(+), 44 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 1da51a996..8433e5c00 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -59,7 +59,7 @@ public: } std::string code; - RegAlloc reg_alloc{*this}; + RegAlloc reg_alloc{}; const Info& info; const Profile& profile; const RuntimeInfo& runtime_info; diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 5a703daf2..82aec66c6 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -86,7 +86,7 @@ struct ScalarF64 : Value {}; class RegAlloc { public: - RegAlloc(EmitContext& ctx_) : ctx{ctx_} {} + RegAlloc() = default; Register Define(IR::Inst& inst); @@ -142,7 +142,6 @@ private: void Free(Id id); - EmitContext& ctx; size_t num_used_registers{}; size_t num_used_long_registers{}; std::bitset register_use{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index b11be5bd7..2edcf592e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -22,7 +22,7 @@ void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string } bool IsPrecise(const IR::Inst& inst) { - return {inst.Flags().no_contraction}; + return inst.Flags().no_contraction; } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 647804814..3588f052b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -109,7 +109,7 @@ private: return; } if (offset.IsImmediate()) { - Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(offset.U32())); + Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast(offset.U32()))); return; } IR::Inst* const inst{offset.InstRecursive()}; @@ -117,16 +117,21 @@ private: switch (inst->GetOpcode()) { case IR::Opcode::CompositeConstructU32x2: Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32())); + ctx.SConst(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()))); return; case IR::Opcode::CompositeConstructU32x3: Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32())); + ctx.SConst(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()))); return; case IR::Opcode::CompositeConstructU32x4: Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32(), - inst->Arg(3).U32())); + ctx.SConst(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()), + static_cast(inst->Arg(3).U32()))); return; default: break; diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 56b001902..9ab108292 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -67,7 +67,8 @@ constexpr OpcodeMeta META_TABLE[]{ }; constexpr size_t CalculateNumArgsOf(Opcode op) { const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); + return static_cast( + std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); } constexpr u8 NUM_ARGS[]{ diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 0e515c3b6..a6bd3e196 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -161,7 +161,6 @@ private: Environment& env; ObjectPool& block_pool; boost::container::small_vector functions; - FunctionId current_function_id{0}; Location program_start; bool exits_to_dispatcher{}; Block* dispatch_block{}; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 06fde0017..221454b99 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -313,9 +313,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { class GotoPass { public: - explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, - ObjectPool& block_pool_, ObjectPool& stmt_pool) - : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); @@ -616,8 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - ObjectPool& inst_pool; - ObjectPool& block_pool; ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -864,7 +860,6 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - u32 loop_id{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -878,7 +873,7 @@ private: IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; - GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 66f39e44e..d9f999e05 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -59,14 +59,14 @@ IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, AtomSize size) { static constexpr IR::FpControl f16_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::DontCare}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::DontCare, }; static constexpr IR::FpControl f32_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::FTZ}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::FTZ, }; switch (op) { case AtomOp::ADD: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e0e157275..0b8119ddd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -104,7 +104,9 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { .rounding = CastFpRounding(i2f.fp_rounding), .fmz_mode = IR::FmzMode::DontCare, }; - auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; + auto value{v.ir.ConvertIToF(static_cast(dst_bitsize), + static_cast(conversion_src_bitsize), is_signed, src, + fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 7d7dcc3cb..924fb7a40 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -80,10 +80,10 @@ void TranslatorVisitor::ALD(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ald.patch != 0) { const IR::Patch patch{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetPatch(patch)); + F(ald.dest_reg + static_cast(element), ir.GetPatch(patch)); } else { const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttribute(attr, vertex)); } } return; @@ -92,7 +92,7 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Indirect patch read"); } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -121,10 +121,10 @@ void TranslatorVisitor::AST(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ast.patch != 0) { const IR::Patch patch{offset / 4 + element}; - ir.SetPatch(patch, F(ast.src_reg + element)); + ir.SetPatch(patch, F(ast.src_reg + static_cast(element))); } else { const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + ir.SetAttribute(attr, F(ast.src_reg + static_cast(element)), vertex); } } return; @@ -133,7 +133,7 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Indexed tessellation patch store"); } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast(element)), vertex); }); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 44144f154..63b588ad4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -69,9 +69,6 @@ TextureType GetType(Type type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { - const auto array{[&](int index) { - return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); - }}; switch (type) { case Type::_1D: case Type::BUFFER_1D: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 7dc793ad7..681220a8d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -160,10 +160,10 @@ unsigned SwizzleMask(u64 swizzle) { IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { std::array colors; for (int i = 0; i < num_regs; ++i) { - colors[i] = ir.GetReg(reg + i); + colors[static_cast(i)] = ir.GetReg(reg + i); } for (int i = num_regs; i < 4; ++i) { - colors[i] = ir.Imm32(0); + colors[static_cast(i)] = ir.Imm32(0); } return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); } @@ -211,12 +211,12 @@ void TranslatorVisitor::SULD(u64 insn) { if (is_typed) { const int num_regs{SizeInRegs(suld.size)}; for (int i = 0; i < num_regs; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } else { const unsigned mask{SwizzleMask(suld.swizzle)}; const int bits{std::popcount(mask)}; - if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast(bits))) { throw NotImplementedException("Unaligned destination register"); } for (unsigned component = 0; component < 4; ++component) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 70449eeca..f9de17b25 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -314,8 +314,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) return std::nullopt; } const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = offset.U32(), }; if (!Common::IsAligned(storage_buffer.offset, 16)) { // The SSBO pointer has to be aligned @@ -484,7 +484,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index = storage_buffer.index, .cbuf_offset = storage_buffer.offset, .count = 1, - .is_written{info.writes.contains(storage_buffer)}, + .is_written = info.writes.contains(storage_buffer), }); } for (const StorageInst& storage_inst : info.to_replace) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 622267147..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -104,9 +104,7 @@ public: template static auto MakeConfigureSpecFunc() { - return [](GraphicsPipeline* pipeline, bool is_indexed) { - pipeline->ConfigureImpl(is_indexed); - }; + return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl(is_indexed); }; } private: From 8c166c68d46d160162caa9b588f1e762c57e52f4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 23:26:13 -0300 Subject: [PATCH 753/770] gl_shader_cache: Properly implement asynchronous shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 58a4f0fb4..24f035c37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -318,7 +318,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { SetXfbState(graphics_key.xfb_state, regs); } if (current_pipeline && graphics_key == current_pipeline->Key()) { - return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + return BuiltPipeline(current_pipeline); } return CurrentGraphicsPipelineSlowPath(); } From 94af0a00f67c9f28fcaf170458e55b7a95de76bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 02:03:25 -0400 Subject: [PATCH 754/770] glsl: Clamp shared mem size to GL_MAX_COMPUTE_SHARED_MEMORY_SIZE --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 11 +++++++++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index ffdc6dbba..c5e819a0a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -218,8 +218,15 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.shared_memory_size > 0) { - ctx.header += - fmt::format("shared uint smem[{}];", Common::DivCeil(program.shared_memory_size, 4U)); + const auto requested_size{program.shared_memory_size}; + const auto max_size{profile.gl_max_compute_smem_size}; + const bool needs_clamp{requested_size > max_size}; + if (needs_clamp) { + LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", + requested_size, max_size); + } + const auto smem_size{needs_clamp ? max_size : requested_size}; + ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); } ctx.header += "void main(){\n"; if (program.local_memory_size > 0) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 501dcaf71..f0c3b3b17 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -67,6 +67,8 @@ struct Profile { bool has_gl_precise_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; + + u32 gl_max_compute_smem_size{}; }; } // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 24f035c37..7ecafc862 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,6 +211,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ .support_float16 = false, From bf2956d77ab0ad06c4b5505cc9906e51e5878274 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Jul 2021 05:22:01 -0300 Subject: [PATCH 755/770] shader: Avoid usage of C++20 ranges to build in clang --- .../backend/glasm/emit_glasm.cpp | 7 +++++-- .../backend/glsl/emit_glsl.cpp | 9 +++++++-- .../frontend/maxwell/control_flow.cpp | 13 ++++++------ .../maxwell/structured_control_flow.cpp | 8 ++++---- .../frontend/maxwell/translate_program.cpp | 20 ++++++++++++------- .../ir_opt/constant_propagation_pass.cpp | 5 +++-- .../ir_opt/dead_code_elimination_pass.cpp | 2 -- .../ir_opt/dual_vertex_pass.cpp | 6 ------ .../global_memory_to_storage_buffer_pass.cpp | 1 - .../ir_opt/lower_int64_to_int32.cpp | 5 +++-- .../ir_opt/ssa_rewrite_pass.cpp | 10 ++++++---- 11 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 64787b353..a5e8c9b6e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include @@ -196,7 +196,10 @@ void PrecolorInst(IR::Inst& phi) { void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { - for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } PrecolorInst(phi); } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index c5e819a0a..8a430d573 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include +#include +#include #include "common/div_ceil.h" #include "common/settings.h" @@ -120,7 +122,10 @@ void PrecolorInst(IR::Inst& phi) { void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { - for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } PrecolorInst(phi); } } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e7abea82f..1a954a509 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -151,18 +150,18 @@ std::pair Stack::Pop(Token token) const { } std::optional Stack::Peek(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - if (it == reverse_entries.end()) { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + if (it == entries.rend()) { return std::nullopt; } return it->target; } Stack Stack::Remove(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - const auto pos{std::distance(reverse_entries.begin(), it)}; + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + const auto pos{std::distance(entries.rbegin(), it)}; Stack result; result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); return result; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 221454b99..8b3e0a15c 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -167,7 +166,7 @@ std::string DumpExpr(const Statement* stmt) { } } -std::string DumpTree(const Tree& tree, u32 indentation = 0) { +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { std::string ret; std::string indent(indentation, ' '); for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { @@ -315,8 +314,9 @@ class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); } } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 83c77967d..c067d459c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include "common/settings.h" @@ -20,12 +19,19 @@ namespace Shader::Maxwell { namespace { IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } return blocks; } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3c72203ad..8dd6d6c2c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include -#include #include #include @@ -599,7 +598,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { ConstantPropagation(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 1e4a3fdae..400836301 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index 3d2c205c2..055ba9c54 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -2,12 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include - -#include "common/bit_cast.h" -#include "common/bit_util.h" -#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f9de17b25..4197b0095 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index abf7c87c7..e80d3d1d9 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include "shader_recompiler/exception.h" @@ -207,7 +206,9 @@ void Lower(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void LowerInt64ToInt32(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { Lower(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index dcaced83f..53145fb5e 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,7 +14,6 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // -#include #include #include #include @@ -243,7 +242,9 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& [variant, phi] : it->second) { + for (auto& pair : it->second) { + auto& variant{pair.first}; + auto& phi{pair.second}; std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -373,8 +374,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { - VisitBlock(pass, block); + const auto end{program.post_order_blocks.rend()}; + for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { + VisitBlock(pass, *block); } } From fc7bed21b539aac4fdde74a41217066eaf8ed3f9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 19:56:14 -0400 Subject: [PATCH 756/770] shader: Implement ISETP.X --- .../maxwell/translate/impl/common_funcs.cpp | 43 +++++++++++++++++++ .../maxwell/translate/impl/common_funcs.h | 4 ++ .../impl/integer_compare_and_set.cpp | 43 ------------------- .../translate/impl/integer_set_predicate.cpp | 11 ++++- 4 files changed, 57 insertions(+), 44 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 10bb01d99..20458d2ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -29,6 +29,49 @@ IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32 } } +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f584060b3..214d0af3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -11,6 +11,10 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed); +[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, + bool is_signed); + [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 34fa7345c..8ce1aee04 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,49 +9,6 @@ namespace Shader::Maxwell { namespace { -IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, - CompareOp compare_op, bool is_signed) { - const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; - const IR::U1 z_flag{ir.GetZFlag()}; - const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; - const IR::U1 flip_logic{is_signed ? ir.Imm1(false) - : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), - ir.ILessThan(operand_2, zero, true))}; - switch (compare_op) { - case CompareOp::False: - return ir.Imm1(false); - case CompareOp::LessThan: - return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - case CompareOp::Equal: - return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); - case CompareOp::LessThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::GreaterThan: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), - ir.IGreaterThan(intermediate, zero, true))}; - const IR::U1 not_z{ir.LogicalNot(z_flag)}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); - } - case CompareOp::NotEqual: - return ir.LogicalOr(ir.INotEqual(intermediate, zero), - ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); - case CompareOp::GreaterThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), - ir.IGreaterThanEqual(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::True: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed, bool x) { return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 7743701d0..bee10e5b9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -9,6 +9,12 @@ namespace Shader::Maxwell { namespace { +IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -17,15 +23,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> x; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; + const bool is_signed{isetp.is_signed != 0}; + const bool x{isetp.x != 0}; const BooleanOp bop{isetp.bop}; const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; + const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; From e1ed218b418cd1ed94f6f25ccd0db86b63bd6bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Jul 2021 03:48:30 -0300 Subject: [PATCH 757/770] renderer_opengl: Use ARB_separate_shader_objects Ensures that states set for a particular stage are not attached to other stages which may not need them. --- .../renderer_opengl/gl_compute_pipeline.cpp | 10 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 62 +++++------ .../renderer_opengl/gl_graphics_pipeline.h | 2 +- .../renderer_opengl/gl_shader_manager.h | 100 ++++++++++++++---- .../renderer_opengl/gl_shader_util.cpp | 57 +++++----- .../renderer_opengl/gl_shader_util.h | 6 +- .../renderer_opengl/renderer_opengl.cpp | 11 +- .../renderer_opengl/renderer_opengl.h | 3 +- .../renderer_opengl/util_shaders.cpp | 19 ++-- 9 files changed, 154 insertions(+), 116 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index c63e87a56..aa1cc592f 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -46,17 +46,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { case Settings::ShaderBackend::GLSL: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); + source_program = CreateProgram(code, GL_COMPUTE_SHADER); break; case Settings::ShaderBackend::GLASM: assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); break; case Settings::ShaderBackend::SPIRV: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); - LinkProgram(source_program.handle); + source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), @@ -154,7 +150,7 @@ void ComputePipeline::Configure() { if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { - program_manager.BindProgram(source_program.handle); + program_manager.BindComputeProgram(source_program.handle); } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 1f19b5825..c8b2d833d 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,44 +237,32 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{ - [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; - case Settings::ShaderBackend::GLASM: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } break; - case Settings::ShaderBackend::SPIRV: { - const auto code{sources_spirv[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; + auto func{[this, device, sources, sources_spirv, + shader_notify](ShaderContext::Context*) mutable { + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + if (!sources[stage].empty()) { + source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); } + break; + case Settings::ShaderBackend::GLASM: + if (!sources[stage].empty()) { + assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + } + break; + case Settings::ShaderBackend::SPIRV: + if (!sources_spirv[stage].empty()) { + source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); + } + break; } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built = true; + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { @@ -449,7 +437,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { - program_manager.BindProgram(program.handle); + program_manager.BindSourcePrograms(source_programs); } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5f5d57385..5e34b9537 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -129,7 +129,7 @@ private: void (*configure_func)(GraphicsPipeline*, bool){}; - OGLProgram program; + std::array source_programs; std::array assembly_programs; u32 enabled_stages_mask{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 88b734bcb..d7ef0775d 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -24,34 +24,68 @@ class ProgramManager { public: explicit ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); if (device.UseAssemblyShaders()) { glEnable(GL_COMPUTE_PROGRAM_NV); } } - void BindProgram(GLuint program) { - if (current_source_program == program) { - return; - } - current_source_program = program; + void BindComputeProgram(GLuint program) { glUseProgram(program); + is_compute_bound = true; } void BindComputeAssemblyProgram(GLuint program) { - if (current_compute_assembly_program != program) { - current_compute_assembly_program = program; + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); + UnbindPipeline(); + } + + void BindSourcePrograms(std::span programs) { + static constexpr std::array stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } } + BindPipeline(); + } + + void BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } + } + BindPipeline(); } void BindAssemblyPrograms(std::span programs, u32 stage_mask) { - const u32 changed_mask = current_assembly_mask ^ stage_mask; - current_assembly_mask = stage_mask; + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; if (changed_mask != 0) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { @@ -65,25 +99,47 @@ public: } } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_assembly_programs[stage] != programs[stage].handle) { - current_assembly_programs[stage] = programs[stage].handle; + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); } } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); - } + UnbindPipeline(); } void RestoreGuestCompute() {} private: - GLuint current_source_program = 0; + void BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); + } - u32 current_assembly_mask = 0; - std::array current_assembly_programs{}; - GLuint current_compute_assembly_program = 0; + void UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); + } + + void UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } + } + + OGLPipeline pipeline; + bool is_pipeline_bound{}; + bool is_compute_bound{}; + + u32 current_stage_mask = 0; + std::array current_programs{}; + GLuint current_assembly_compute_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5109985f1..d432072ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,6 +13,33 @@ namespace OpenGL { +static OGLProgram LinkSeparableProgram(GLuint shader) { + OGLProgram program; + program.handle = glCreateProgram(); + glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glAttachShader(program.handle, shader); + glLinkProgram(program.handle); + if (!Settings::values.renderer_debug) { + return program; + } + GLint link_status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return program; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program.handle, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } + return program; +} + static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); @@ -36,7 +63,7 @@ static void LogShader(GLuint shader, std::string_view code = {}) { } } -void AttachShader(GLenum stage, GLuint program, std::string_view code) { +OGLProgram CreateProgram(std::string_view code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); @@ -44,45 +71,23 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { const GLchar* const code_ptr = code.data(); glShaderSource(shader.handle, 1, &code_ptr, &length); glCompileShader(shader.handle); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle, code); } + return LinkSeparableProgram(shader.handle); } -void AttachShader(GLenum stage, GLuint program, std::span code) { +OGLProgram CreateProgram(std::span code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), static_cast(code.size_bytes())); glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle); } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } + return LinkSeparableProgram(shader.handle); } OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index ff5aa024f..4e1a2a8e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -17,11 +17,9 @@ namespace OpenGL { -void AttachShader(GLenum stage, GLuint program, std::string_view code); +OGLProgram CreateProgram(std::string_view code, GLenum stage); -void AttachShader(GLenum stage, GLuint program, std::span code); - -void LinkProgram(GLuint program); +OGLProgram CreateProgram(std::span code, GLenum stage); OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c9cfa6366..d15167e19 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -251,10 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - present_program.handle = glCreateProgram(); - AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); - AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); - LinkProgram(present_program.handle); + present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); @@ -340,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - program_manager.BindProgram(present_program.handle); - glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); + program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b3ee55665..d455f572f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -110,7 +110,8 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram present_program; + OGLProgram present_vertex; + OGLProgram present_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8aa0683c8..37a4d1d9d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -42,12 +42,7 @@ using VideoCore::Surface::BytesPerBlock; namespace { OGLProgram MakeProgram(std::string_view source) { - OGLProgram program; - OGLShader shader; - program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, program.handle, source); - LinkProgram(program.handle); - return program; + return CreateProgram(source, GL_COMPUTE_SHADER); } size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { @@ -84,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindProgram(astc_decoder_program.handle); + program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -132,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -171,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -220,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindProgram(pitch_unswizzle_program.handle); + program_manager.BindComputeProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -248,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span Date: Thu, 15 Jul 2021 18:37:24 -0400 Subject: [PATCH 758/770] glsl: Update TessellationControl gl_in Adheres to GL_ARB_separate_shader_objects requirements --- .../backend/glsl/emit_context.cpp | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e08d2d2eb..4e6f2c0fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -257,6 +257,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { } } +void SetupInPerVertex(EmitContext& ctx, std::string& header) { + // Currently only required for TessellationControl to adhere to + // ARB_separate_shader_objects requirements + if (ctx.stage != Stage::TessellationControl) { + return; + } + const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; + const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; + const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; + const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; + if (!loads_per_vertex) { + return; + } + header += "in gl_PerVertex{"; + if (loads_position) { + header += "vec4 gl_Position;"; + } + if (loads_point_size) { + header += "float gl_PointSize;"; + } + if (loads_clip_distance) { + header += "float gl_ClipDistance[];"; + } + header += "}gl_in[gl_MaxPatchVertices];"; +} + void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { if (!ctx.info.loads.Legacy()) { return; @@ -334,6 +360,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } SetupOutPerVertex(*this, header); + SetupInPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { @@ -375,6 +402,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } void EmitContext::SetupExtensions() { + header += "#extension GL_ARB_separate_shader_objects : enable\n"; if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } From 56c30dd9e0fec5275ed4d4b4c63bb35049dc000d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 16 Jul 2021 20:13:15 -0400 Subject: [PATCH 759/770] glsl: Simplify FCMP emission --- .../backend/glsl/emit_glsl_floating_point.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index 2edcf592e..d423bfb1b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -13,12 +13,10 @@ namespace Shader::Backend::GLSL { namespace { void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, std::string_view op, bool ordered) { - ctx.AddU1("{}={}{}{}", inst, lhs, op, rhs, lhs, rhs); - if (ordered) { - ctx.Add("&&!isnan({})&&!isnan({});", lhs, rhs); - } else { - ctx.Add("||isnan({})||isnan({});", lhs, rhs); - } + const auto nan_op{ordered ? "&&!" : "||"}; + ctx.AddU1("{}={}{}{}" + "{}isnan({}){}isnan({});", + inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs); } bool IsPrecise(const IR::Inst& inst) { From c9528282d93c6542444a1f35a83eb8101ece157a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Jul 2021 00:59:57 -0400 Subject: [PATCH 760/770] gl_device: Simplify GLASM setting logic --- src/video_core/renderer_opengl/gl_device.cpp | 23 +++++++------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 563b291cd..6afe6c1e1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,16 +172,14 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = - Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && - GLAD_GL_NV_transform_feedback2; - - shader_backend = (Settings::values.shader_backend.GetValue() == - Settings::ShaderBackend::GLASM) == use_assembly_shaders - ? Settings::values.shader_backend.GetValue() - : Settings::ShaderBackend::GLSL; - + shader_backend = Settings::values.shader_backend.GetValue(); + use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && + GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { + LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); + shader_backend = Settings::ShaderBackend::GLSL; + } // Completely disable async shaders for now, as it causes graphical glitches use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. @@ -194,11 +192,6 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - - if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { - LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); - } - if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); } From 56478bc9ac5a01ca5c73ba72faae1a5eaae0f8cb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Jul 2021 16:16:23 -0400 Subject: [PATCH 761/770] shader: Fix disabled attribute default values --- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 756de0a27..fb8c02a77 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -300,7 +300,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const std::optional type{AttrTypes(ctx, index)}; if (!type) { // Attribute is disabled - return ctx.Const(0.0f); + return ctx.Const(element == 3 ? 1.0f : 0.0f); } if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { // Varying component is not written diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d15167e19..285e78384 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -145,7 +145,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, GLint max_attribs{}; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); for (GLint attrib = 0; attrib < max_attribs; ++attrib) { - glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f); } // Enable seamless cubemaps when per texture parameters are not available if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { From 4a82450c8139ee751f23f2d50bec6e748e7c9637 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 20:56:50 -0300 Subject: [PATCH 762/770] cmake: Remove shader cache version --- src/common/CMakeLists.txt | 10 +--------- src/common/scm_rev.cpp.in | 2 -- src/core/reporter.cpp | 1 - 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index c92266a17..3e34c6c2d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -1,8 +1,3 @@ -# Add a custom command to generate a new shader_cache_version hash when any of the following files change -# NOTE: This is an approximation of what files affect shader generation, its possible something else -# could affect the result, but much more unlikely than the following files. Keeping a list of files -# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update -set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core") if (DEFINED ENV{AZURECIREPO}) set(BUILD_REPOSITORY $ENV{AZURECIREPO}) endif() @@ -30,10 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp -DGIT_EXECUTABLE=${GIT_EXECUTABLE} -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake DEPENDS - # WARNING! It was too much work to try and make a common location for this list, - # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - # ... - # and also check that the scm_rev files haven't changed + # Check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" # technically we should regenerate if the git version changed, but its not worth the effort imo diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index 5f126f324..cc88994c6 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in @@ -14,7 +14,6 @@ #define BUILD_ID "@BUILD_ID@" #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" -#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@" namespace Common { @@ -28,7 +27,6 @@ const char g_build_version[] = BUILD_VERSION; const char g_build_id[] = BUILD_ID; const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; -const char g_shader_cache_version[] = SHADER_CACHE_VERSION; } // namespace diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index cfaf50105..365b8f906 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -62,7 +62,6 @@ json GetYuzuVersionData() { {"build_date", std::string(Common::g_build_date)}, {"build_fullname", std::string(Common::g_build_fullname)}, {"build_version", std::string(Common::g_build_version)}, - {"shader_cache_version", std::string(Common::g_shader_cache_version)}, }; } From 258f35515d61d01049d2e433146cab808837bb7d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 21:07:12 -0300 Subject: [PATCH 763/770] shader_environment: Receive cache version from outside This allows us invalidating OpenGL and Vulkan separately in the future. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 10 +++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 9 ++++++--- src/video_core/shader_environment.cpp | 11 +++++------ src/video_core/shader_environment.h | 9 +++++---- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ecafc862..8d6cc074c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -48,9 +48,12 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -287,7 +290,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -394,7 +397,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION); return pipeline; } @@ -492,7 +495,8 @@ std::unique_ptr ShaderCache::CreateComputePipeline( if (!pipeline || shader_cache_filename.empty()) { return pipeline; } - SerializePipeline(key, std::array{&env}, shader_cache_filename); + SerializePipeline(key, std::array{&env}, shader_cache_filename, + CACHE_VERSION); return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 39db35175..2ce8b4156 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,6 +54,8 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -434,7 +436,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -562,7 +565,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } @@ -581,7 +584,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } serialization_thread.QueueWork([this, key, env = std::move(env)] { SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 429cab30d..8a4581c19 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,6 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -370,7 +369,7 @@ std::array FileEnvironment::WorkgroupSize() const { } void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename) try { + const std::filesystem::path& filename, u32 cache_version) try { std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); file.exceptions(std::ifstream::failbit); if (!file.is_open()) { @@ -381,7 +380,7 @@ void SerializePipeline(std::span key, std::span(&CACHE_VERSION), sizeof(CACHE_VERSION)); + .write(reinterpret_cast(&cache_version), sizeof(cache_version)); } if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { return; @@ -402,7 +401,7 @@ void SerializePipeline(std::span key, std::span load_compute, Common::UniqueFunction> load_graphics) try { std::ifstream file(filename, std::ios::binary | std::ios::ate); @@ -417,13 +416,13 @@ void LoadPipelines( u32 cache_version; file.read(magic_number.data(), magic_number.size()) .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { file.close(); if (Common::FS::RemoveFile(filename)) { if (magic_number != MAGIC_NUMBER) { LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); } - if (cache_version != CACHE_VERSION) { + if (cache_version != expected_cache_version) { LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); } } else { diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index d26dbfaab..2079979db 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -164,18 +164,19 @@ private: }; void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename); + const std::filesystem::path& filename, u32 cache_version); template -void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename, + u32 cache_version) { static_assert(std::is_trivially_copyable_v); static_assert(std::has_unique_object_representations_v); SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), - std::span(envs.data(), envs.size()), filename); + std::span(envs.data(), envs.size()), filename, cache_version); } void LoadPipelines( - std::stop_token stop_loading, const std::filesystem::path& filename, + std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, Common::UniqueFunction load_compute, Common::UniqueFunction> load_graphics); From 8381490a04f4618ec5be90904815b409e3f4ca59 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:05:41 -0300 Subject: [PATCH 764/770] opengl: Fix asynchronous shaders Wait for shader to build before configuring it, and wait for the shader to build before sharing it with other contexts. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 30 +++++++++++++++++-- .../renderer_opengl/gl_graphics_pipeline.h | 7 ++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c8b2d833d..fac0034fb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,10 +237,12 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, sources_spirv, - shader_notify](ShaderContext::Context*) mutable { + const bool in_parallel = thread_worker != nullptr; + const auto backend = device.GetShaderBackend(); + auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { + switch (backend) { case Settings::ShaderBackend::GLSL: if (!sources[stage].empty()) { source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); @@ -249,6 +251,10 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (in_parallel) { + // Make sure program is built before continuing when building in parallel + glGetString(GL_PROGRAM_ERROR_STRING_NV); + } } break; case Settings::ShaderBackend::SPIRV: @@ -258,10 +264,20 @@ GraphicsPipeline::GraphicsPipeline( break; } } + if (in_parallel && backend != Settings::ShaderBackend::GLASM) { + // Make sure programs have built if we are building shaders in parallel + for (OGLProgram& program : source_programs) { + if (program.handle != 0) { + GLint status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &status); + } + } + } if (shader_notify) { shader_notify->MarkShaderComplete(); } is_built = true; + built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -434,6 +450,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + if (!is_built.load(std::memory_order::relaxed)) { + WaitForBuild(); + } if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { @@ -545,4 +564,9 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } +void GraphicsPipeline::WaitForBuild() { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5e34b9537..4e28d9a42 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -119,6 +119,8 @@ private: void GenerateTransformFeedbackState(); + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -143,13 +145,16 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; - std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; std::array xfb_attribs{}; std::array xfb_streams{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + std::atomic_bool is_built{false}; }; } // namespace OpenGL From 3c6d440015d7ffb81eedbfcd7ee1aab1ea87ee2a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:08:06 -0300 Subject: [PATCH 765/770] Revert "renderers: Disable async shader compilation" This reverts commit 4a152767286717fa69bfc94846a124a366f70065. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6afe6c1e1..9692b8e94 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -180,11 +180,9 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); shader_backend = Settings::ShaderBackend::GLSL; } - // Completely disable async shaders for now, as it causes graphical glitches - use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - // !(is_amd || (is_intel && !is_linux)); + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2ce8b4156..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{false}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; From a741513e65f09f85dba7a092fb80f2a872ac3397 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:09:21 -0300 Subject: [PATCH 766/770] qt: Remove "experimental" from asynchronous shader building UI --- src/yuzu/configuration/configure_graphics_advanced.ui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 772e5fed3..379dc5d2e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,7 +82,7 @@ Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental. - Use asynchronous shader building (experimental) + Use asynchronous shader building From 594ea29015c8fe8ebd93acad0aa3572799c3a896 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:13:51 -0300 Subject: [PATCH 767/770] cmake: Remove unused code in GenerateSCMRev.cmake Remove shader code hash generation code as it's no longer used. --- CMakeModules/GenerateSCMRev.cmake | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 77358768e..43ca730ec 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -48,15 +48,6 @@ if (BUILD_REPOSITORY) endif() endif() -# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) -set(VIDEO_CORE "${SRC_DIR}/src/video_core") -set(HASH_FILES - # ... -) -set(COMBINED "") -foreach (F IN LISTS HASH_FILES) - file(READ ${F} TMP) - set(COMBINED "${COMBINED}${TMP}") -endforeach() -string(MD5 SHADER_CACHE_VERSION "${COMBINED}") +# The variable SRC_DIR must be passed into the script +# (since it uses the current build directory for all values of CMAKE_*_DIR) configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY) From f6796cad9c4259aa13aab1f8b2e27392e07432b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:51:10 -0300 Subject: [PATCH 768/770] vulkan_device: Blacklist Volta and older from VK_KHR_push_descriptor Causes crashes on Link's Awakening intro. It's hard to debug if it's our fault due to bugs in validation layers. --- .../vulkan_common/vulkan_device.cpp | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 13d938434..44afdc1cd 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -34,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ }; } // namespace Alternatives +enum class NvidiaArchitecture { + AmpereOrNewer, + Turing, + VoltaOrOlder, +}; + constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_MAINTENANCE1_EXTENSION_NAME, VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, @@ -198,13 +204,34 @@ std::unordered_map GetFormatProperties(vk::Physica std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector supported_extensions(std::size(extensions)); + std::vector supported_extensions; + supported_extensions.reserve(extensions.size()); for (const auto& extension : extensions) { supported_extensions.emplace_back(extension.extensionName); } return supported_extensions; } +NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, + std::span exts) { + if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; + shading_rate_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + VkPhysicalDeviceProperties2KHR physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + physical_properties.pNext = &shading_rate_props; + physical.GetProperties2KHR(physical_properties); + if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { + // Only Ampere and newer support this feature + return NvidiaArchitecture::AmpereOrNewer; + } + } + if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { + return NvidiaArchitecture::Turing; + } + return NvidiaArchitecture::VoltaOrOlder; +} } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, @@ -522,11 +549,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { - if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != - supported_extensions.end()) { + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + switch (arch) { + case NvidiaArchitecture::AmpereOrNewer: LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); is_float16_supported = false; + break; + case NvidiaArchitecture::Turing: + break; + case NvidiaArchitecture::VoltaOrOlder: + LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); + khr_push_descriptor = false; + break; } } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { From a55ff22900c5261915eb8b88f2c0f18a4eb6f30f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:52:29 -0300 Subject: [PATCH 769/770] vulkan/blit_image: Commit descriptor sets within worker thread Fixes race condition caused. The descriptor pool is not thread safe, so we have to commit descriptor sets within the same thread. --- src/video_core/renderer_vulkan/blit_image.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 4058f62cd..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -376,11 +376,11 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, + src_view](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, @@ -402,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); - const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, - src_stencil_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + src_stencil_view, this](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -448,14 +447,12 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ .width = src_image_view.size.width, .height = src_image_view.size.height, }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -476,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb .tex_scale = {viewport.width, viewport.height}, .tex_offset = {0.0f, 0.0f}, }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); // TODO: Barriers From 7f13104c1778cfdfd54350e92603164070781124 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 18:28:36 -0300 Subject: [PATCH 770/770] shader: Support out of bound local memory reads and immediate writes Support ignoring immediate out of bound writes. Writing dynamically out of bounds is not yet supported (e.g. R0+0x4). Reading out of bounds yields zero. This is supported checking for the size from the IR; if the input is immediate, the optimization passes will drop it. --- .../impl/load_store_local_shared.cpp | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 20df163f2..d2a1dbf61 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -85,21 +85,28 @@ IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); } + +IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { + const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; + const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; + return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; +} } // Anonymous namespace void TranslatorVisitor::LDL(u64 insn) { const auto [word_offset, offset]{WordOffset(*this, insn)}; + const IR::U32 word{LoadLocal(*this, word_offset, offset)}; const IR::Reg dest{Reg(insn)}; const auto [bit_size, is_signed]{GetSize(insn)}; switch (bit_size) { case 8: { const IR::U32 bit{ByteOffset(ir, offset)}; - X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); break; } case 16: { const IR::U32 bit{ShortOffset(ir, offset)}; - X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); break; } case 32: @@ -108,9 +115,11 @@ void TranslatorVisitor::LDL(u64 insn) { if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } - X(dest, ir.LoadLocal(word_offset)); + X(dest, word); for (int i = 1; i < bit_size / 32; ++i) { - X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; + const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; + X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); } break; } @@ -141,6 +150,14 @@ void TranslatorVisitor::LDS(u64 insn) { void TranslatorVisitor::STL(u64 insn) { const auto [word_offset, offset]{WordOffset(*this, insn)}; + if (offset.IsImmediate()) { + // TODO: Support storing out of bounds at runtime + if (offset.U32() >= env.LocalMemorySize()) { + LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", + offset.U32(), env.LocalMemorySize()); + return; + } + } const IR::Reg reg{Reg(insn)}; const IR::U32 src{X(reg)}; const int bit_size{GetSize(insn).first};