From 2c2e019a44e353921d5fdf3cc6ab4304502eb2bd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 28 Jan 2023 16:09:58 -0500 Subject: [PATCH] shader_recompiler: TXQ: Skip QueryLevels when possible --- .../backend/glasm/emit_glasm_image.cpp | 2 +- .../backend/glasm/emit_glasm_instructions.h | 2 +- .../backend/glsl/emit_glsl_image.cpp | 20 +++++++++---------- .../backend/glsl/emit_glsl_instructions.h | 2 +- .../backend/spirv/emit_spirv_image.cpp | 6 ++++-- .../backend/spirv/emit_spirv_instructions.h | 3 ++- .../frontend/ir/ir_emitter.cpp | 9 +++++---- .../frontend/ir/ir_emitter.h | 5 +++-- src/shader_recompiler/frontend/ir/opcodes.inc | 6 +++--- .../maxwell/translate/impl/texture_query.cpp | 8 +++++--- src/shader_recompiler/ir_opt/texture_pass.cpp | 3 ++- 11 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index e67e80fac..ec633a297 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -531,7 +531,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod) { + ScalarS32 lod, [[maybe_unused]] const IR::Value& skip_mips) { const auto info{inst.Flags()}; const std::string texture{Texture(ctx, info, index)}; const std::string_view type{TextureType(info)}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index eaaf9ba39..d16fb7d37 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -582,7 +582,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - ScalarS32 lod); + ScalarS32 lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& derivatives, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index cecdbb9d6..a7e433f7d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -455,27 +455,27 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { + std::string_view lod, const IR::Value& skip_mips_val) { const auto info{inst.Flags()}; const auto texture{Texture(ctx, info, index)}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{ + [&] { return skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture); }}; switch (info.type) { case TextureType::Color1D: - return ctx.AddU32x4( - "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uint(textureSize({},int({}))),0u,0u,{});", inst, texture, lod, + mips()); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: case TextureType::Color2DRect: - return ctx.AddU32x4( - "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, - texture, lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({},int({}))),0u,{});", inst, texture, lod, + mips()); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - return ctx.AddU32x4( - "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, - lod, texture); + return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({},int({}))),{});", inst, texture, lod, + mips()); case TextureType::Buffer: throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 4151c89de..02f91f2f4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -655,7 +655,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, std::string_view ms); void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod); + std::string_view lod, const IR::Value& skip_mips); void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords); void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index c898ce12f..3b969d915 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -445,11 +445,13 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips_val) { const auto info{inst->Flags()}; const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; - const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + const bool skip_mips{skip_mips_val.U1()}; + const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { case TextureType::Color1D: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e31cdc5e8..1b99f648d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -540,7 +540,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, + const IR::Value& skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index eb2e49a68..8b379c920 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1851,15 +1851,16 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); } -Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions : Opcode::BindlessImageQueryDimensions}; - return Inst(op, handle, lod); + return Inst(op, handle, lod, skip_mips); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, - TextureInstInfo info) { - return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod); + const IR::U1& skip_mips, TextureInstInfo info) { + return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips); } Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7aaaa4ab0..df158c928 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -320,9 +320,10 @@ public: [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& lod, const Value& offset, TextureInstInfo info); - [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, - TextureInstInfo info); + const IR::U1& skip_mips); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod, + const IR::U1& skip_mips, TextureInstInfo info); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1fe3749cc..4ba262b84 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -483,7 +483,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, U1, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) @@ -496,7 +496,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, U1, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageRead, U32x4, U32, Opaque, ) @@ -509,7 +509,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, Opaq OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index f8cfd4ab6..39af62559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -15,11 +15,13 @@ enum class Mode : u64 { SamplePos = 5, }; -IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg, u64 mask) { switch (mode) { case Mode::Dimension: { + const bool needs_num_mips{((mask >> 3) & 1) != 0}; + const IR::U1 skip_mips{v.ir.Imm1(!needs_num_mips)}; const IR::U32 lod{v.X(src_reg)}; - return v.ir.ImageQueryDimension(handle, lod); + return v.ir.ImageQueryDimension(handle, lod, skip_mips); } case Mode::TextureType: case Mode::SamplePos: @@ -46,7 +48,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { handle = v.X(src_reg); ++src_reg; } - const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + const IR::Value query{Query(v, handle, txq.mode, src_reg, txq.mask)}; IR::Reg dest_reg{txq.dest_reg}; for (int element = 0; element < 4; ++element) { if (((txq.mask >> element) & 1) == 0) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 9718c6921..fdc350ef4 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -452,7 +452,8 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { const IR::Value coord(inst.Arg(1)); const IR::Value handle(ir.Imm32(0)); const IR::U32 lod{ir.Imm32(0)}; - const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, info); + const IR::U1 skip_mips{ir.Imm1(true)}; + const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, skip_mips, info); inst.SetArg( 1, ir.CompositeConstruct( ir.FPMul(IR::F32(ir.CompositeExtract(coord, 0)),