diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index dc6825a00..046ee55a5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -234,7 +234,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, const std::size_t size_in_bytes = code.size() * sizeof(u64); auto registry = std::make_shared(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, shader_type, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); // TODO(Rodrigo): Handle VertexA shaders // std::optional ir_b; // if (!code_b.empty()) { @@ -264,7 +264,7 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog auto& engine = params.system.GPU().KeplerCompute(); auto registry = std::make_shared(ShaderType::Compute, engine); - const ShaderIR ir(code, ShaderType::Compute, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); @@ -341,7 +341,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const bool is_compute = entry.type == ShaderType::Compute; const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, entry.type, main_offset, COMPILER_SETTINGS, *registry); + const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); std::shared_ptr program; if (precompiled_entry) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7a8824ebc..557b9d662 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -162,7 +162,7 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag ProgramCode program_code, u32 main_offset) : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, - shader_ir{this->program_code, stage, main_offset, compiler_settings, registry}, + shader_ir{this->program_code, main_offset, compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} CachedShader::~CachedShader() = default; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 8d4530386..68913085f 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -271,6 +271,40 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { } } // Anonymous namespace +Node ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, + const Node original_value, bool* is_signed) { + switch (component_type) { + case ComponentType::SNORM: { + *is_signed = true; + // range [-1.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate((1 << component_size) / 2.f - 1.f)); + cnv_value = SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)); + return BitfieldExtract(std::move(cnv_value), 0, component_size); + } + case ComponentType::SINT: + case ComponentType::UNORM: { + *is_signed = component_type == ComponentType::SINT; + // range [0.0, 1.0] + auto cnv_value = + Operation(OperationCode::FMul, original_value, Immediate((1 << component_size) - 1.f)); + return SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)); + } + case ComponentType::UINT: // range [0, (1 << component_size) - 1] + *is_signed = false; + return original_value; + case ComponentType::FLOAT: + if (component_size == 16) { + return Operation(OperationCode::HCastFloat, original_value); + } else { + return original_value; + } + default: + UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + return original_value; + } +} + u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -309,7 +343,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32); + UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && + instr.suldst.GetStoreDataLayout() != StoreType::Bits64); auto descriptor = [this, instr] { std::optional descriptor; @@ -333,7 +368,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (instr.suldst.GetStoreDataLayout()) { case StoreType::Bits32: { u32 shifted_counter = 0; - // value should be RGBA format Node value = Immediate(0); for (u32 element = 0; element < 4; ++element) { if (!IsComponentEnabled(comp_mask, element)) { @@ -343,39 +377,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const auto component_size = GetComponentSize(descriptor.format, element); bool is_signed = true; MetaImage meta{image, {}, element}; - const Node original_value = - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - Node converted_value = [&] { - switch (component_type) { - case ComponentType::SNORM: { - is_signed = true; - // range [-1.0, 1.0] - auto cnv_value = - Operation(OperationCode::FMul, original_value, Immediate(127.f)); - cnv_value = SignedOperation(OperationCode::ICastFloat, is_signed, - std::move(cnv_value)); - return BitfieldExtract(std::move(cnv_value), 0, 8); - } - case ComponentType::SINT: - case ComponentType::UNORM: { - is_signed = false; - // range [0.0, 1.0] - auto cnv_value = - Operation(OperationCode::FMul, original_value, Immediate(255.f)); - return SignedOperation(OperationCode::ICastFloat, is_signed, - std::move(cnv_value)); - } - case ComponentType::UINT: // range [0, 255] - is_signed = false; - return original_value; - case ComponentType::FLOAT: - return original_value; - default: - UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); - return original_value; - } - }(); + Node converted_value = GetComponentValue( + component_type, component_size, + Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)), + &is_signed); + // shift element to correct position const auto shifted = shifted_counter; if (shifted > 0) { @@ -391,6 +398,56 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0.Value(), std::move(value)); break; } + case StoreType::Bits64: { + u32 indexer = 0; + u32 shifted_counter = 0; + Node value = Immediate(0); + for (u32 element = 0; element < 4; ++element) { + if (!IsComponentEnabled(comp_mask, element)) { + continue; + } + const auto component_type = GetComponentType(descriptor, element); + const auto component_size = GetComponentSize(descriptor.format, element); + + bool is_signed = true; + MetaImage meta{image, {}, element}; + + Node converted_value = GetComponentValue( + component_type, component_size, + Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)), + &is_signed); + + // shift element to correct position + const auto shifted = shifted_counter; + if (shifted > 0) { + converted_value = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, + std::move(converted_value), Immediate(shifted)); + } + shifted_counter += component_size; + + // add value into result + value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); + + // if we shifted enough for 1 byte -> we save it into temp + if (shifted_counter >= 32) { + SetTemporary(bb, indexer++, std::move(value)); + + // we only use 2 bytes for bits64 + if (indexer >= 2) { + break; + } + + // reset counter and value to prepare pack next byte + value = Immediate(0); + shifted_counter = 0; + } + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } default: UNREACHABLE(); break; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index bbd86a2c5..baf7188d2 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -24,10 +24,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, Tegra::Engines::ShaderType shader_stage, - u32 main_offset, CompilerSettings settings, Registry& registry) - : program_code{program_code}, shader_stage{shader_stage}, - main_offset{main_offset}, settings{settings}, registry{registry} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + Registry& registry) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { Decode(); PostDecode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e531181cd..408cce71e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -68,8 +68,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, Tegra::Engines::ShaderType shader_stage, - u32 main_offset, CompilerSettings settings, Registry& registry); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + Registry& registry); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -312,6 +312,10 @@ private: /// Conditionally saturates a half float pair Node GetSaturatedHalfFloat(Node value, bool saturate = true); + /// Get image component value by type and size + Node GetComponentValue(Tegra::Texture::ComponentType component_type, u32 component_size, + const Node original_value, bool* is_signed); + /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate comparing two integers @@ -419,7 +423,6 @@ private: u32 NewCustomVariable(); const ProgramCode& program_code; - const Tegra::Engines::ShaderType shader_stage; const u32 main_offset; const CompilerSettings settings; Registry& registry;