From 04037c7af34a0f68458eb74db0909d65f6f45810 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 29 Nov 2025 10:40:56 +0100 Subject: [PATCH] aco/optimizer: propagate salu fneg Foz-DB Navi48: Totals from 23796 (24.37% of 97637) affected shaders: MaxWaves: 638922 -> 638898 (-0.00%) Instrs: 32968990 -> 32880147 (-0.27%); split: -0.28%, +0.01% CodeSize: 174252352 -> 173922400 (-0.19%); split: -0.20%, +0.01% VGPRs: 1396472 -> 1396592 (+0.01%) SpillSGPRs: 63672 -> 63599 (-0.11%) Latency: 201025393 -> 200966204 (-0.03%); split: -0.05%, +0.02% InvThroughput: 37429702 -> 37411026 (-0.05%); split: -0.06%, +0.01% VClause: 534241 -> 534115 (-0.02%); split: -0.05%, +0.02% SClause: 831765 -> 831559 (-0.02%); split: -0.07%, +0.05% Copies: 2404134 -> 2400539 (-0.15%); split: -0.29%, +0.14% Branches: 728518 -> 728503 (-0.00%); split: -0.00%, +0.00% PreSGPRs: 1337403 -> 1336846 (-0.04%); split: -0.04%, +0.00% PreVGPRs: 1017490 -> 1017521 (+0.00%); split: -0.00%, +0.00% VALU: 18319620 -> 18318960 (-0.00%); split: -0.01%, +0.00% SALU: 5069557 -> 5001384 (-1.34%); split: -1.38%, +0.03% VOPD: 80235 -> 80172 (-0.08%); split: +0.13%, -0.21% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index f801bce50c9..b1b8f054658 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2765,13 +2765,15 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) if (!ctx.program->needs_wqm) ctx.info[instr->definitions[0].tempId()].set_constant(0u); break; + case aco_opcode::s_mul_f16: + case aco_opcode::s_mul_f32: case aco_opcode::v_mul_f16: case aco_opcode::v_mul_f32: case aco_opcode::v_mul_legacy_f32: case aco_opcode::v_mul_f64: case aco_opcode::v_mul_f64_e64: { bool uses_mods = instr->usesModifiers(); - bool fp16 = instr->opcode == aco_opcode::v_mul_f16; + bool fp16 = instr->opcode == aco_opcode::v_mul_f16 || instr->opcode == aco_opcode::s_mul_f16; bool fp64 = instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64; unsigned bit_size = fp16 ? 16 : (fp64 ? 64 : 32); @@ -2783,22 +2785,27 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) double constant = extract_float(instr->operands[!i].constantValue64(), bit_size); - if (!instr->isDPP() && !instr->isSDWA() && !instr->valu().opsel && fabs(constant) == 1.0) { - bool neg1 = constant == -1.0; + if (!instr->isDPP() && !instr->isSDWA() && (!instr->isVALU() || !instr->valu().opsel) && + fabs(constant) == 1.0) { + bool neg = constant == -1.0; + bool abs = false; - VALU_instruction* valu = &instr->valu(); - if (valu->abs[!i] || valu->neg[!i] || valu->omod || valu->clamp) - continue; + if (instr->isVALU()) { + VALU_instruction* valu = &instr->valu(); + if (valu->abs[!i] || valu->neg[!i] || valu->omod || valu->clamp) + continue; + + abs = valu->abs[i]; + neg ^= valu->neg[i]; + } - bool abs = valu->abs[i]; - bool neg = neg1 ^ valu->neg[i]; Temp other = instr->operands[i].getTemp(); - if (abs && neg && other.type() == RegType::vgpr) + if (abs && neg && other.type() == instr->definitions[0].getTemp().type()) ctx.info[instr->definitions[0].tempId()].set_neg_abs(other, bit_size); - else if (abs && !neg && other.type() == RegType::vgpr) + else if (abs && !neg && other.type() == instr->definitions[0].getTemp().type()) ctx.info[instr->definitions[0].tempId()].set_abs(other, bit_size); - else if (!abs && neg && other.type() == RegType::vgpr) + else if (!abs && neg && other.type() == instr->definitions[0].getTemp().type()) ctx.info[instr->definitions[0].tempId()].set_neg(other, bit_size); else if (!abs && !neg) { if (denorm_mode == fp_denorm_keep || ctx.info[other.id()].is_canonicalized(bit_size))