Merge pull request #4391 from lioncash/nrvo
video_core: Allow copy elision to take place where applicable
This commit is contained in:
commit
f650cf8a9a
|
@ -1704,7 +1704,7 @@ std::string ARBDecompiler::HCastFloat(Operation operation) {
|
|||
}
|
||||
|
||||
std::string ARBDecompiler::HUnpack(Operation operation) {
|
||||
const std::string operand = Visit(operation[0]);
|
||||
std::string operand = Visit(operation[0]);
|
||||
switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
|
||||
case Tegra::Shader::HalfType::H0_H1:
|
||||
return operand;
|
||||
|
@ -2054,7 +2054,7 @@ std::string ARBDecompiler::InvocationId(Operation) {
|
|||
|
||||
std::string ARBDecompiler::YNegate(Operation) {
|
||||
LOG_WARNING(Render_OpenGL, "(STUBBED)");
|
||||
const std::string temporary = AllocTemporary();
|
||||
std::string temporary = AllocTemporary();
|
||||
AddLine("MOV.F {}, 1;", temporary);
|
||||
return temporary;
|
||||
}
|
||||
|
|
|
@ -126,7 +126,7 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
|||
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
|
||||
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
|
||||
entry.graphics_info, entry.compute_info};
|
||||
const auto registry = std::make_shared<Registry>(entry.type, info);
|
||||
auto registry = std::make_shared<Registry>(entry.type, info);
|
||||
for (const auto& [address, value] : entry.keys) {
|
||||
const auto [buffer, offset] = address;
|
||||
registry->InsertKey(buffer, offset, value);
|
||||
|
|
|
@ -1919,7 +1919,7 @@ private:
|
|||
Expression Comparison(Operation operation) {
|
||||
static_assert(!unordered || type == Type::Float);
|
||||
|
||||
const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
|
||||
Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
|
||||
|
||||
if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
|
||||
// GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
|
||||
|
|
|
@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
|||
op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
|
||||
op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
|
||||
|
||||
const Node value = [&]() {
|
||||
const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
|
||||
const Node value = [&] {
|
||||
Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
|
||||
if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
|
||||
return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
|
||||
}
|
||||
const Node shifted = [&]() {
|
||||
const Node shifted = [&] {
|
||||
switch (instr.iadd3.mode) {
|
||||
case Tegra::Shader::IAdd3Mode::RightShift:
|
||||
// TODO(tech4me): According to
|
||||
|
|
|
@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
|
|||
return pc;
|
||||
}
|
||||
|
||||
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
|
||||
Tegra::Shader::VideoType type, u64 byte_height) {
|
||||
Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
|
||||
u64 byte_height) {
|
||||
if (!is_chunk) {
|
||||
return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
|
||||
}
|
||||
const Node zero = Immediate(0);
|
||||
|
||||
switch (type) {
|
||||
case Tegra::Shader::VideoType::Size16_Low:
|
||||
case VideoType::Size16_Low:
|
||||
return BitfieldExtract(op, 0, 16);
|
||||
case Tegra::Shader::VideoType::Size16_High:
|
||||
case VideoType::Size16_High:
|
||||
return BitfieldExtract(op, 16, 16);
|
||||
case Tegra::Shader::VideoType::Size32:
|
||||
case VideoType::Size32:
|
||||
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
|
||||
// (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
|
||||
UNIMPLEMENTED();
|
||||
return zero;
|
||||
case Tegra::Shader::VideoType::Invalid:
|
||||
return Immediate(0);
|
||||
case VideoType::Invalid:
|
||||
UNREACHABLE_MSG("Invalid instruction encoding");
|
||||
return zero;
|
||||
return Immediate(0);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return zero;
|
||||
return Immediate(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
|||
SetTemporary(bb, 0, product);
|
||||
product = GetTemporary(0);
|
||||
|
||||
const Node original_c = op_c;
|
||||
Node original_c = op_c;
|
||||
const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
|
||||
op_c = [&]() {
|
||||
op_c = [&] {
|
||||
switch (set_mode) {
|
||||
case Tegra::Shader::XmadMode::None:
|
||||
return original_c;
|
||||
case Tegra::Shader::XmadMode::CLo:
|
||||
return BitfieldExtract(original_c, 0, 16);
|
||||
return BitfieldExtract(std::move(original_c), 0, 16);
|
||||
case Tegra::Shader::XmadMode::CHi:
|
||||
return BitfieldExtract(original_c, 16, 16);
|
||||
return BitfieldExtract(std::move(original_c), 16, 16);
|
||||
case Tegra::Shader::XmadMode::CBcc: {
|
||||
const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
|
||||
original_b, Immediate(16));
|
||||
return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
|
||||
Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
|
||||
original_b, Immediate(16));
|
||||
return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
|
||||
std::move(shifted_b));
|
||||
}
|
||||
case Tegra::Shader::XmadMode::CSfu: {
|
||||
const Node comp_a =
|
||||
|
|
|
@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
|
|||
}
|
||||
|
||||
Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
|
||||
const Node node = MakeNode<InternalFlagNode>(flag);
|
||||
Node node = MakeNode<InternalFlagNode>(flag);
|
||||
if (negated) {
|
||||
return Operation(OperationCode::LogicalNegate, node);
|
||||
return Operation(OperationCode::LogicalNegate, std::move(node));
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
|
Reference in New Issue