yuzu-emu
/
yuzu-android
Archived
1
0
Fork 0

shader: Implement indexed attributes

This commit is contained in:
FernandoS27 2021-04-04 06:47:14 +02:00 committed by ameerj
parent 0df7e509db
commit 1d51803169
12 changed files with 279 additions and 35 deletions

View File

@ -82,6 +82,28 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) {
} }
throw InvalidArgument("Invalid attribute type {}", type); throw InvalidArgument("Invalid attribute type {}", type);
} }
struct AttrInfo {
Id pointer;
Id id;
bool needs_cast;
};
std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
const AttributeType type{ctx.profile.generic_input_types.at(index)};
switch (type) {
case AttributeType::Float:
return AttrInfo{ctx.input_f32, ctx.F32[1], false};
case AttributeType::UnsignedInt:
return AttrInfo{ctx.input_u32, ctx.U32[1], true};
case AttributeType::SignedInt:
return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
case AttributeType::Disabled:
return std::nullopt;
}
throw InvalidArgument("Invalid attribute type {}", type);
}
} // Anonymous namespace } // Anonymous namespace
void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@ -107,6 +129,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
DefineConstantBuffers(program.info, binding); DefineConstantBuffers(program.info, binding);
DefineStorageBuffers(program.info, binding); DefineStorageBuffers(program.info, binding);
DefineTextures(program.info, binding); DefineTextures(program.info, binding);
DefineAttributeMemAccess(program.info);
DefineLabels(program); DefineLabels(program);
} }
@ -290,6 +313,107 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
} }
} }
void EmitContext::DefineAttributeMemAccess(const Info& info) {
const auto make_load{[&]() {
const Id end_block{OpLabel()};
const Id default_label{OpLabel()};
const Id func_type_load{TypeFunction(F32[1], U32[1])};
const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)};
const Id offset{OpFunctionParameter(U32[1])};
AddLabel();
const Id base_index{OpShiftRightLogical(U32[1], offset, Constant(U32[1], 2U))};
const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))};
const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))};
std::vector<Sirit::Literal> literals;
std::vector<Id> labels;
const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
for (u32 i = 0; i < info.input_generics.size(); i++) {
if (!info.input_generics[i].used) {
continue;
}
literals.push_back(base_attribute_value + i);
labels.push_back(OpLabel());
}
OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
OpSwitch(compare_index, default_label, literals, labels);
AddLabel(default_label);
OpReturnValue(Constant(F32[1], 0.0f));
size_t label_index = 0;
for (u32 i = 0; i < info.input_generics.size(); i++) {
if (!info.input_generics[i].used) {
continue;
}
AddLabel(labels[label_index]);
const auto type{AttrTypes(*this, i)};
if (!type) {
OpReturnValue(Constant(F32[1], 0.0f));
label_index++;
continue;
}
const Id generic_id{input_generics.at(i)};
const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)};
const Id value{OpLoad(type->id, pointer)};
const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
OpReturnValue(result);
label_index++;
}
AddLabel(end_block);
OpUnreachable();
OpFunctionEnd();
return func;
}};
const auto make_store{[&]() {
const Id end_block{OpLabel()};
const Id default_label{OpLabel()};
const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])};
const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)};
const Id offset{OpFunctionParameter(U32[1])};
const Id store_value{OpFunctionParameter(F32[1])};
AddLabel();
const Id base_index{OpShiftRightLogical(U32[1], offset, Constant(U32[1], 2U))};
const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))};
const Id compare_index{OpShiftRightLogical(U32[1], base_index, Constant(U32[1], 2U))};
std::vector<Sirit::Literal> literals;
std::vector<Id> labels;
const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2;
for (u32 i = 0; i < info.stores_generics.size(); i++) {
if (!info.stores_generics[i]) {
continue;
}
literals.push_back(base_attribute_value + i);
labels.push_back(OpLabel());
}
OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone);
OpSwitch(compare_index, default_label, literals, labels);
AddLabel(default_label);
OpReturn();
size_t label_index = 0;
for (u32 i = 0; i < info.stores_generics.size(); i++) {
if (!info.stores_generics[i]) {
continue;
}
AddLabel(labels[label_index]);
const Id generic_id{output_generics.at(i)};
const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
OpStore(pointer, store_value);
OpReturn();
label_index++;
}
AddLabel(end_block);
OpUnreachable();
OpFunctionEnd();
return func;
}};
if (info.loads_indexed_attributes) {
indexed_load_func = make_load();
}
if (info.stores_indexed_attributes) {
indexed_store_func = make_store();
}
}
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
if (info.constant_buffer_descriptors.empty()) { if (info.constant_buffer_descriptors.empty()) {
return; return;

View File

@ -116,6 +116,9 @@ public:
Id fswzadd_lut_a{}; Id fswzadd_lut_a{};
Id fswzadd_lut_b{}; Id fswzadd_lut_b{};
Id indexed_load_func{};
Id indexed_store_func{};
Id local_memory{}; Id local_memory{};
Id shared_memory_u8{}; Id shared_memory_u8{};
@ -148,6 +151,7 @@ private:
void DefineConstantBuffers(const Info& info, u32& binding); void DefineConstantBuffers(const Info& info, u32& binding);
void DefineStorageBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding);
void DefineTextures(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding);
void DefineAttributeMemAccess(const Info& info);
void DefineLabels(IR::Program& program); void DefineLabels(IR::Program& program);
void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding,

View File

@ -51,8 +51,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value);
void EmitGetAttributeIndexed(EmitContext& ctx); Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset);
void EmitSetAttributeIndexed(EmitContext& ctx); void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value);
void EmitGetZFlag(EmitContext& ctx); void EmitGetZFlag(EmitContext& ctx);

View File

@ -216,12 +216,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) {
ctx.OpStore(*output, value); ctx.OpStore(*output, value);
} }
void EmitGetAttributeIndexed(EmitContext&) { Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset);
} }
void EmitSetAttributeIndexed(EmitContext&) { void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) {
throw NotImplementedException("SPIR-V Instruction"); ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value);
} }
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {

View File

@ -307,6 +307,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
Inst(Opcode::SetAttribute, attribute, value); Inst(Opcode::SetAttribute, attribute, value);
} }
F32 IREmitter::GetAttributeIndexed(IR::U32 phys_address) {
return Inst<F32>(Opcode::GetAttributeIndexed, phys_address);
}
void IREmitter::SetAttributeIndexed(IR::U32 phys_address, const F32& value) {
Inst(Opcode::SetAttributeIndexed, phys_address, value);
}
void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
} }

View File

@ -76,6 +76,9 @@ public:
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
void SetAttribute(IR::Attribute attribute, const F32& value); void SetAttribute(IR::Attribute attribute, const F32& value);
[[nodiscard]] F32 GetAttributeIndexed(IR::U32 phys_address);
void SetAttributeIndexed(IR::U32 phys_address, const F32& value);
void SetFragColor(u32 index, u32 component, const F32& value); void SetFragColor(u32 index, u32 component, const F32& value);
void SetFragDepth(const F32& value); void SetFragDepth(const F32& value);

View File

@ -87,7 +87,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::DeadCodeEliminationPass(program); Optimization::DeadCodeEliminationPass(program);
Optimization::IdentityRemovalPass(program); Optimization::IdentityRemovalPass(program);
Optimization::VerificationPass(program); Optimization::VerificationPass(program);
Optimization::CollectShaderInfoPass(program); Optimization::CollectShaderInfoPass(env, program);
CollectInterpolationInfo(env, program); CollectInterpolationInfo(env, program);
return program; return program;
} }

View File

@ -31,7 +31,7 @@ enum class SampleMode : u64 {
Offset, Offset,
}; };
int NumElements(Size size) { u32 NumElements(Size size) {
switch (size) { switch (size) {
case Size::B32: case Size::B32:
return 1; return 1;
@ -65,15 +65,21 @@ void TranslatorVisitor::ALD(u64 insn) {
if (ald.patch != 0) { if (ald.patch != 0) {
throw NotImplementedException("P"); throw NotImplementedException("P");
} }
if (ald.index_reg != IR::Reg::RZ) {
throw NotImplementedException("Indexed");
}
const u64 offset{ald.absolute_offset.Value()}; const u64 offset{ald.absolute_offset.Value()};
if (offset % 4 != 0) { if (offset % 4 != 0) {
throw NotImplementedException("Unaligned absolute offset {}", offset); throw NotImplementedException("Unaligned absolute offset {}", offset);
} }
const int num_elements{NumElements(ald.size)}; const u32 num_elements{NumElements(ald.size)};
for (int element = 0; element < num_elements; ++element) { if (ald.index_reg != IR::Reg::RZ) {
const IR::U32 index_value = X(ald.index_reg);
for (u32 element = 0; element < num_elements; ++element) {
const IR::U32 final_offset =
element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))};
F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset));
}
return;
}
for (u32 element = 0; element < num_elements; ++element) {
F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element}));
} }
} }
@ -103,8 +109,17 @@ void TranslatorVisitor::AST(u64 insn) {
if (offset % 4 != 0) { if (offset % 4 != 0) {
throw NotImplementedException("Unaligned absolute offset {}", offset); throw NotImplementedException("Unaligned absolute offset {}", offset);
} }
const int num_elements{NumElements(ast.size)}; const u32 num_elements{NumElements(ast.size)};
for (int element = 0; element < num_elements; ++element) { if (ast.index_reg != IR::Reg::RZ) {
const IR::U32 index_value = X(ast.index_reg);
for (u32 element = 0; element < num_elements; ++element) {
const IR::U32 final_offset =
element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))};
ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element));
}
return;
}
for (u32 element = 0; element < num_elements; ++element) {
ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element));
} }
} }
@ -134,12 +149,9 @@ void TranslatorVisitor::IPA(u64 insn) {
// gl_FragColor = colors[idx]; // gl_FragColor = colors[idx];
// } // }
const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
if (is_indexed) {
throw NotImplementedException("IDX");
}
const IR::Attribute attribute{ipa.attribute}; const IR::Attribute attribute{ipa.attribute};
IR::F32 value{ir.GetAttribute(attribute)}; IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
: ir.GetAttribute(attribute)};
if (IR::IsGeneric(attribute)) { if (IR::IsGeneric(attribute)) {
const ProgramHeader& sph{env.SPH()}; const ProgramHeader& sph{env.SPH()};
const u32 attr_index{IR::GenericAttributeIndex(attribute)}; const u32 attr_index{IR::GenericAttributeIndex(attribute)};

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "shader_recompiler/environment.h"
#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/microinstruction.h"
#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/program.h"
@ -323,6 +324,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::SetAttribute: case IR::Opcode::SetAttribute:
SetAttribute(info, inst.Arg(0).Attribute()); SetAttribute(info, inst.Arg(0).Attribute());
break; break;
case IR::Opcode::GetAttributeIndexed:
info.loads_indexed_attributes = true;
break;
case IR::Opcode::SetAttributeIndexed:
info.stores_indexed_attributes = true;
break;
case IR::Opcode::SetFragColor: case IR::Opcode::SetFragColor:
info.stores_frag_color[inst.Arg(0).U32()] = true; info.stores_frag_color[inst.Arg(0).U32()] = true;
break; break;
@ -502,15 +509,42 @@ void Visit(Info& info, IR::Inst& inst) {
VisitUsages(info, inst); VisitUsages(info, inst);
VisitFpModifiers(info, inst); VisitFpModifiers(info, inst);
} }
void GatherInfoFromHeader(Environment& env, Info& info) {
auto stage = env.ShaderStage();
if (stage == Stage::Compute) {
return;
}
const auto& header = env.SPH();
if (stage == Stage::Fragment) {
for (size_t i = 0; i < info.input_generics.size(); i++) {
info.input_generics[i].used =
info.input_generics[i].used || header.ps.IsGenericVectorActive(i);
}
return;
}
for (size_t i = 0; i < info.input_generics.size(); i++) {
info.input_generics[i].used =
info.input_generics[i].used || header.vtg.IsInputGenericVectorActive(i);
}
for (size_t i = 0; i < info.stores_generics.size(); i++) {
info.stores_generics[i] =
info.stores_generics[i] || header.vtg.IsOutputGenericVectorActive(i);
}
info.stores_clip_distance =
info.stores_clip_distance || header.vtg.omap_systemc.clip_distances != 0;
}
} // Anonymous namespace } // Anonymous namespace
void CollectShaderInfoPass(IR::Program& program) { void CollectShaderInfoPass(Environment& env, IR::Program& program) {
Info& info{program.info}; Info& info{program.info};
for (IR::Block* const block : program.post_order_blocks) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
Visit(info, inst); Visit(info, inst);
} }
} }
GatherInfoFromHeader(env, info);
} }
} // namespace Shader::Optimization } // namespace Shader::Optimization

View File

@ -12,7 +12,7 @@
namespace Shader::Optimization { namespace Shader::Optimization {
void CollectShaderInfoPass(IR::Program& program); void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConstantPropagationPass(IR::Program& program); void ConstantPropagationPass(IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program); void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program);

View File

@ -68,10 +68,24 @@ struct ProgramHeader {
union { union {
struct { struct {
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union {
BitField<0, 1, u8> x;
BitField<1, 1, u8> y;
BitField<2, 1, u8> z;
BitField<3, 1, u8> w;
BitField<4, 1, u8> x2;
BitField<5, 1, u8> y2;
BitField<6, 1, u8> z2;
BitField<7, 1, u8> w2;
BitField<0, 4, u8> first;
BitField<4, 4, u8> second;
u8 raw;
} imap_generic_vector[16];
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
union { union {
BitField<0, 8, u16> clip_distances; BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s; BitField<8, 1, u16> point_sprite_s;
@ -82,15 +96,54 @@ struct ProgramHeader {
BitField<14, 1, u16> instance_id; BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id; BitField<15, 1, u16> vertex_id;
}; };
INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
INSERT_PADDING_BYTES_NOINIT(2); // OmapColor union {
INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC BitField<0, 1, u8> x;
INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] BitField<1, 1, u8> y;
INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved BitField<2, 1, u8> z;
BitField<3, 1, u8> w;
BitField<4, 1, u8> x2;
BitField<5, 1, u8> y2;
BitField<6, 1, u8> z2;
BitField<7, 1, u8> w2;
BitField<0, 4, u8> first;
BitField<4, 4, u8> second;
u8 raw;
} omap_generic_vector[16];
INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
union {
BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s;
BitField<9, 1, u16> point_sprite_t;
BitField<10, 1, u16> fog_coordinate;
BitField<12, 1, u16> tessellation_eval_point_u;
BitField<13, 1, u16> tessellation_eval_point_v;
BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id;
} omap_systemc;
INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
[[nodiscard]] bool IsInputGenericVectorActive(size_t index) const {
if ((index & 1) == 0) {
return imap_generic_vector[index >> 1].first != 0;
}
return imap_generic_vector[index >> 1].second != 0;
}
[[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const {
if ((index & 1) == 0) {
return omap_generic_vector[index >> 1].first != 0;
}
return omap_generic_vector[index >> 1].second != 0;
}
} vtg; } vtg;
struct { struct {
@ -128,6 +181,10 @@ struct ProgramHeader {
const auto& vector{imap_generic_vector[attribute]}; const auto& vector{imap_generic_vector[attribute]};
return {vector.x, vector.y, vector.z, vector.w}; return {vector.x, vector.y, vector.z, vector.w};
} }
[[nodiscard]] bool IsGenericVectorActive(size_t index) const {
return imap_generic_vector[index].raw != 0;
}
} ps; } ps;
std::array<u32, 0xf> raw; std::array<u32, 0xf> raw;

View File

@ -76,6 +76,7 @@ struct Info {
bool loads_vertex_id{}; bool loads_vertex_id{};
bool loads_front_face{}; bool loads_front_face{};
bool loads_point_coord{}; bool loads_point_coord{};
bool loads_indexed_attributes{};
std::array<bool, 8> stores_frag_color{}; std::array<bool, 8> stores_frag_color{};
bool stores_frag_depth{}; bool stores_frag_depth{};
@ -84,6 +85,7 @@ struct Info {
bool stores_point_size{}; bool stores_point_size{};
bool stores_clip_distance{}; bool stores_clip_distance{};
bool stores_viewport_index{}; bool stores_viewport_index{};
bool stores_indexed_attributes{};
bool uses_fp16{}; bool uses_fp16{};
bool uses_fp64{}; bool uses_fp64{};