aco/optimizer: propagate salu fneg

Foz-DB Navi48:
Totals from 23796 (24.37% of 97637) affected shaders:
MaxWaves: 638922 -> 638898 (-0.00%)
Instrs: 32968990 -> 32880147 (-0.27%); split: -0.28%, +0.01%
CodeSize: 174252352 -> 173922400 (-0.19%); split: -0.20%, +0.01%
VGPRs: 1396472 -> 1396592 (+0.01%)
SpillSGPRs: 63672 -> 63599 (-0.11%)
Latency: 201025393 -> 200966204 (-0.03%); split: -0.05%, +0.02%
InvThroughput: 37429702 -> 37411026 (-0.05%); split: -0.06%, +0.01%
VClause: 534241 -> 534115 (-0.02%); split: -0.05%, +0.02%
SClause: 831765 -> 831559 (-0.02%); split: -0.07%, +0.05%
Copies: 2404134 -> 2400539 (-0.15%); split: -0.29%, +0.14%
Branches: 728518 -> 728503 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 1337403 -> 1336846 (-0.04%); split: -0.04%, +0.00%
PreVGPRs: 1017490 -> 1017521 (+0.00%); split: -0.00%, +0.00%
VALU: 18319620 -> 18318960 (-0.00%); split: -0.01%, +0.00%
SALU: 5069557 -> 5001384 (-1.34%); split: -1.38%, +0.03%
VOPD: 80235 -> 80172 (-0.08%); split: +0.13%, -0.21%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38723>
This commit is contained in:
Georg Lehmann 2025-11-29 10:40:56 +01:00 committed by Marge Bot
parent 8b1340a52c
commit 04037c7af3

View file

@ -2765,13 +2765,15 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (!ctx.program->needs_wqm)
ctx.info[instr->definitions[0].tempId()].set_constant(0u);
break;
case aco_opcode::s_mul_f16:
case aco_opcode::s_mul_f32:
case aco_opcode::v_mul_f16:
case aco_opcode::v_mul_f32:
case aco_opcode::v_mul_legacy_f32:
case aco_opcode::v_mul_f64:
case aco_opcode::v_mul_f64_e64: {
bool uses_mods = instr->usesModifiers();
bool fp16 = instr->opcode == aco_opcode::v_mul_f16;
bool fp16 = instr->opcode == aco_opcode::v_mul_f16 || instr->opcode == aco_opcode::s_mul_f16;
bool fp64 =
instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64;
unsigned bit_size = fp16 ? 16 : (fp64 ? 64 : 32);
@ -2783,22 +2785,27 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
double constant = extract_float(instr->operands[!i].constantValue64(), bit_size);
if (!instr->isDPP() && !instr->isSDWA() && !instr->valu().opsel && fabs(constant) == 1.0) {
bool neg1 = constant == -1.0;
if (!instr->isDPP() && !instr->isSDWA() && (!instr->isVALU() || !instr->valu().opsel) &&
fabs(constant) == 1.0) {
bool neg = constant == -1.0;
bool abs = false;
VALU_instruction* valu = &instr->valu();
if (valu->abs[!i] || valu->neg[!i] || valu->omod || valu->clamp)
continue;
if (instr->isVALU()) {
VALU_instruction* valu = &instr->valu();
if (valu->abs[!i] || valu->neg[!i] || valu->omod || valu->clamp)
continue;
abs = valu->abs[i];
neg ^= valu->neg[i];
}
bool abs = valu->abs[i];
bool neg = neg1 ^ valu->neg[i];
Temp other = instr->operands[i].getTemp();
if (abs && neg && other.type() == RegType::vgpr)
if (abs && neg && other.type() == instr->definitions[0].getTemp().type())
ctx.info[instr->definitions[0].tempId()].set_neg_abs(other, bit_size);
else if (abs && !neg && other.type() == RegType::vgpr)
else if (abs && !neg && other.type() == instr->definitions[0].getTemp().type())
ctx.info[instr->definitions[0].tempId()].set_abs(other, bit_size);
else if (!abs && neg && other.type() == RegType::vgpr)
else if (!abs && neg && other.type() == instr->definitions[0].getTemp().type())
ctx.info[instr->definitions[0].tempId()].set_neg(other, bit_size);
else if (!abs && !neg) {
if (denorm_mode == fp_denorm_keep || ctx.info[other.id()].is_canonicalized(bit_size))