From 640fc1418b08449f2ba729588aef5e06bc5df636 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 24 Dec 2021 17:53:36 -0500 Subject: [PATCH 1/5] emit_glsl_floating_point: Fix FPNeg on newer Nvidia drivers --- .../backend/glsl/emit_glsl_floating_point.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index b765a251b..474189d87 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -125,11 +125,11 @@ void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=-({});", inst, value); + ctx.AddF32("{}=0.f-({});", inst, value); } void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF64("{}=-({});", inst, value); + ctx.AddF64("{}=double(0.)-({});", inst, value); } void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { From 14ac0c2923c41df9c6fc4833d2a8e46a6efe5b59 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 24 Dec 2021 20:00:28 -0500 Subject: [PATCH 2/5] shader: Add integer attribute get optimization pass Works around an nvidia driver bug, where casting the integer attributes to float and back to an integer always returned 0. --- .../glasm/emit_glasm_context_get_set.cpp | 16 ++++++++++++ .../backend/glasm/emit_glasm_instructions.h | 1 + .../glsl/emit_glsl_context_get_set.cpp | 16 ++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 2 ++ .../spirv/emit_spirv_context_get_set.cpp | 25 +++++++++++++++++++ .../backend/spirv/emit_spirv_instructions.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../ir_opt/collect_shader_info_pass.cpp | 1 + .../ir_opt/constant_propagation_pass.cpp | 23 +++++++++++++++++ 9 files changed, 86 insertions(+) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 081b2c8e0..c0f5fc402 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -126,6 +126,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal } } +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) { + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.Add("MOV.S {}.x,primitive.id;", inst); + break; + case IR::Attribute::InstanceId: + ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); + break; + case IR::Attribute::VertexId: + ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); + break; + default: + throw NotImplementedException("Get U32 attribute {}", attr); + } +} + void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { const u32 element{static_cast(attr) % 4}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 1f343bff5..b48007856 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 6477bd192..5ef46d634 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -221,6 +221,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } } +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) { + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddU32("{}=uint(gl_PrimitiveID);", inst); + break; + case IR::Attribute::InstanceId: + ctx.AddU32("{}=uint(gl_InstanceID);", inst); + break; + case IR::Attribute::VertexId: + ctx.AddU32("{}=uint(gl_VertexID);", inst); + break; + default: + throw NotImplementedException("Get U32 attribute {}", attr); + } +} + void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, [[maybe_unused]] std::string_view vertex) { if (IR::IsGeneric(attr)) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index f86502e4c..6cabbc717 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view vertex); +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 14f470812..8ea730c80 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { } } +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { + switch (attr) { + case IR::Attribute::PrimitiveId: + return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InstanceId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; + return ctx.OpISub(ctx.U32[1], index, base); + } + case IR::Attribute::VertexId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; + return ctx.OpISub(ctx.U32[1], index, base); + } + default: + throw NotImplementedException("Read U32 attribute {}", attr); + } +} + void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { const std::optional output{OutputAttrPointer(ctx, attr)}; if (!output) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 6cd22dd3e..887112deb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6929919df..b94ce7406 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -40,6 +40,7 @@ OPCODE(GetCbufU32, U32, U32, OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU32x2, U32x2, U32, U32, ) OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) OPCODE(GetAttributeIndexed, F32, U32, U32, ) OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1e476d83d..a78c469be 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: + case IR::Opcode::GetAttributeU32: info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index d089fdd12..c134a12bc 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } } + if constexpr (op == IR::Opcode::BitCastU32F32) { + // Workaround for new NVIDIA driver bug, where: + // uint attr = ftou(itof(gl_InstanceID)); + // always returned 0. + // We can instead manually optimize this and work around the driver bug: + // uint attr = uint(gl_InstanceID); + if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) { + const IR::Attribute attr{arg_inst->Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PrimitiveId: + case IR::Attribute::InstanceId: + case IR::Attribute::VertexId: + break; + default: + return; + } + // Replace the bitcasts with an integer attribute get + inst.ReplaceOpcode(IR::Opcode::GetAttributeU32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } + } } void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { From 9f34be5a6176674c9f4be0e2636cf1c01c067e69 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 25 Dec 2021 16:59:55 -0500 Subject: [PATCH 3/5] emit_glsl_integer: Use negation work around --- src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 44060df33..b0d85be99 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -87,11 +87,11 @@ void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin } void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=uint(-({}));", inst, value); + ctx.AddU32("{}=uint(int(0)-int({}));", inst, value); } void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=-({});", inst, value); + ctx.AddU64("{}=uint64_t(int64_t(0)-int64_t({}));", inst, value); } void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { From b84d429c2ec59e54a89d9d4e34b0df9f22172e8f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 27 Dec 2021 23:59:32 -0500 Subject: [PATCH 4/5] glsl_context_get_set: Add alternative cbuf type for broken drivers some drivers have a bug bitwise converting floating point cbuf values to uint variables. This adds a workaround for these drivers to make all cbufs uint and convert to floating point as needed. --- .../glsl/emit_glsl_context_get_set.cpp | 35 +++++++++++-------- .../backend/glsl/glsl_emit_context.cpp | 7 ++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_device.cpp | 9 ++--- src/video_core/renderer_opengl/gl_device.h | 5 +++ .../renderer_opengl/gl_shader_cache.cpp | 1 + 6 files changed, 35 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 5ef46d634..0c1fbc7b1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf8(ctx, inst, binding, offset, "ftou"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf8(ctx, inst, binding, offset, cast); } void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf8(ctx, inst, binding, offset, "ftoi"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; + GetCbuf8(ctx, inst, binding, offset, cast); } void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf16(ctx, inst, binding, offset, "ftou"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf16(ctx, inst, binding, offset, cast); } void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf16(ctx, inst, binding, offset, "ftoi"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; + GetCbuf16(ctx, inst, binding, offset, cast); } void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - GetCbuf(ctx, ret, binding, offset, 32, "ftou"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf(ctx, ret, binding, offset, 32, cast); } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; - GetCbuf(ctx, ret, binding, offset, 32); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""}; + GetCbuf(ctx, ret, binding, offset, 32, cast); } void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; if (offset.IsImmediate()) { static constexpr u32 cbuf_size{0x10000}; const u32 u32_offset{offset.U32()}; @@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding return; } if (u32_offset % 2 == 0) { - ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, + ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16, OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); } else { - ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, - OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, - OffsetSwizzle(u32_offset + 4)); + ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf, + u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf, + (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4)); } return; } const auto offset_var{ctx.var_alloc.Consume(offset)}; if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", - inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); + ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst, + cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var); return; } const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, - swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, + ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset, + swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var, "xyzw"[(swizzle + 1) % 4]); } } diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index bc9d2a904..bb7f1a0fd 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { return; } for (const auto& desc : info.constant_buffer_descriptors) { - header += fmt::format( - "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", - bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); + const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"}; + header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};", + bindings.uniform_buffer, stage_name, desc.index, cbuf_type, + stage_name, desc.index, 4 * 1024); bindings.uniform_buffer += desc.count; } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0c3b3b17..9deb3f4bb 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -65,6 +65,8 @@ struct Profile { bool has_gl_component_indexing_bug{}; /// The precise type qualifier is broken in the fragment stage of some drivers bool has_gl_precise_bug{}; + /// Some drivers do not properly support floatBitsToUint when used on cbufs + bool has_gl_cbuf_ftou_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 0764ea6e0..32736126f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,17 +182,12 @@ Device::Device() { shader_backend = Settings::ShaderBackend::GLSL; } - if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia && - !Settings::values.renderer_debug) { + if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) { const std::string_view driver_version = version.substr(13); const int version_major = std::atoi(driver_version.substr(0, driver_version.find(".")).data()); - if (version_major >= 495) { - LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems " - "with yuzu. Forcing GLASM as a mitigation."); - shader_backend = Settings::ShaderBackend::GLASM; - use_assembly_shaders = true; + has_cbuf_ftou_bug = true; } } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de9e41659..fe53ef991 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -152,6 +152,10 @@ public: return need_fastmath_off; } + bool HasCbufFtouBug() const { + return has_cbuf_ftou_bug; + } + Settings::ShaderBackend GetShaderBackend() const { return shader_backend; } @@ -200,6 +204,7 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + bool has_cbuf_ftou_bug{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 29c6e1a5f..1efcc3562 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), + .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(), .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, From 8c907c620d830bdaef30ff4316489443775b3ea4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 29 Dec 2021 18:55:42 -0500 Subject: [PATCH 5/5] glsl: Add boolean reference workaround --- .../backend/glsl/emit_glsl_bitwise_conversion.cpp | 4 +++- src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | 4 +++- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 6 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 0f2668d9e..e0ead7a53 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/backend/glsl/glsl_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -30,8 +31,9 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) inst.DestructiveAddUsage(1); const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; const auto input{ctx.var_alloc.Consume(value)}; + const auto suffix{ctx.profile.has_gl_bool_ref_bug ? "?true:false" : ""}; if (ret != input) { - ctx.Add("{}={};", ret, input); + ctx.Add("{}={}{};", ret, input, suffix); } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index b8ddafe48..fcf620b79 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -90,7 +90,9 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& if (phi_reg == val_reg) { return; } - ctx.Add("{}={};", phi_reg, val_reg); + const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1}; + const auto suffix{needs_workaround ? "?true:false" : ""}; + ctx.Add("{}={}{};", phi_reg, val_reg, suffix); } void EmitPrologue(EmitContext& ctx) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9deb3f4bb..dc4c806ff 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -67,6 +67,8 @@ struct Profile { bool has_gl_precise_bug{}; /// Some drivers do not properly support floatBitsToUint when used on cbufs bool has_gl_cbuf_ftou_bug{}; + /// Some drivers poorly optimize boolean variable references + bool has_gl_bool_ref_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 32736126f..e62912a22 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -188,6 +188,7 @@ Device::Device() { std::atoi(driver_version.substr(0, driver_version.find(".")).data()); if (version_major >= 495) { has_cbuf_ftou_bug = true; + has_bool_ref_bug = true; } } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index fe53ef991..95c2e8d38 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -156,6 +156,10 @@ public: return has_cbuf_ftou_bug; } + bool HasBoolRefBug() const { + return has_bool_ref_bug; + } + Settings::ShaderBackend GetShaderBackend() const { return shader_backend; } @@ -205,6 +209,7 @@ private: bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; bool has_cbuf_ftou_bug{}; + bool has_bool_ref_bug{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1efcc3562..ec558a9af 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -215,6 +215,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(), + .has_gl_bool_ref_bug = device.HasBoolRefBug(), .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), },