From 04037c7af34a0f68458eb74db0909d65f6f45810 Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Sat, 29 Nov 2025 10:40:56 +0100
Subject: [PATCH] aco/optimizer: propagate salu fneg

Foz-DB Navi48:
Totals from 23796 (24.37% of 97637) affected shaders:
MaxWaves: 638922 -> 638898 (-0.00%)
Instrs: 32968990 -> 32880147 (-0.27%); split: -0.28%, +0.01%
CodeSize: 174252352 -> 173922400 (-0.19%); split: -0.20%, +0.01%
VGPRs: 1396472 -> 1396592 (+0.01%)
SpillSGPRs: 63672 -> 63599 (-0.11%)
Latency: 201025393 -> 200966204 (-0.03%); split: -0.05%, +0.02%
InvThroughput: 37429702 -> 37411026 (-0.05%); split: -0.06%, +0.01%
VClause: 534241 -> 534115 (-0.02%); split: -0.05%, +0.02%
SClause: 831765 -> 831559 (-0.02%); split: -0.07%, +0.05%
Copies: 2404134 -> 2400539 (-0.15%); split: -0.29%, +0.14%
Branches: 728518 -> 728503 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 1337403 -> 1336846 (-0.04%); split: -0.04%, +0.00%
PreVGPRs: 1017490 -> 1017521 (+0.00%); split: -0.00%, +0.00%
VALU: 18319620 -> 18318960 (-0.00%); split: -0.01%, +0.00%
SALU: 5069557 -> 5001384 (-1.34%); split: -1.38%, +0.03%
VOPD: 80235 -> 80172 (-0.08%); split: +0.13%, -0.21%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38723>
---
 src/amd/compiler/aco_optimizer.cpp | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index f801bce50c9..b1b8f054658 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2765,13 +2765,15 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       if (!ctx.program->needs_wqm)
          ctx.info[instr->definitions[0].tempId()].set_constant(0u);
       break;
+   case aco_opcode::s_mul_f16:
+   case aco_opcode::s_mul_f32:
    case aco_opcode::v_mul_f16:
    case aco_opcode::v_mul_f32:
    case aco_opcode::v_mul_legacy_f32:
    case aco_opcode::v_mul_f64:
    case aco_opcode::v_mul_f64_e64: {
       bool uses_mods = instr->usesModifiers();
-      bool fp16 = instr->opcode == aco_opcode::v_mul_f16;
+      bool fp16 = instr->opcode == aco_opcode::v_mul_f16 || instr->opcode == aco_opcode::s_mul_f16;
       bool fp64 =
          instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64;
       unsigned bit_size = fp16 ? 16 : (fp64 ? 64 : 32);
@@ -2783,22 +2785,27 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
          double constant = extract_float(instr->operands[!i].constantValue64(), bit_size);
 
-         if (!instr->isDPP() && !instr->isSDWA() && !instr->valu().opsel && fabs(constant) == 1.0) {
-            bool neg1 = constant == -1.0;
+         if (!instr->isDPP() && !instr->isSDWA() && (!instr->isVALU() || !instr->valu().opsel) &&
+             fabs(constant) == 1.0) {
+            bool neg = constant == -1.0;
+            bool abs = false;
 
-            VALU_instruction* valu = &instr->valu();
-            if (valu->abs[!i] || valu->neg[!i] || valu->omod || valu->clamp)
-               continue;
+            if (instr->isVALU()) {
+               VALU_instruction* valu = &instr->valu();
+               if (valu->abs[!i] || valu->neg[!i] || valu->omod || valu->clamp)
+                  continue;
+
+               abs = valu->abs[i];
+               neg ^= valu->neg[i];
+            }
 
-            bool abs = valu->abs[i];
-            bool neg = neg1 ^ valu->neg[i];
             Temp other = instr->operands[i].getTemp();
 
-            if (abs && neg && other.type() == RegType::vgpr)
+            if (abs && neg && other.type() == instr->definitions[0].getTemp().type())
                ctx.info[instr->definitions[0].tempId()].set_neg_abs(other, bit_size);
-            else if (abs && !neg && other.type() == RegType::vgpr)
+            else if (abs && !neg && other.type() == instr->definitions[0].getTemp().type())
                ctx.info[instr->definitions[0].tempId()].set_abs(other, bit_size);
-            else if (!abs && neg && other.type() == RegType::vgpr)
+            else if (!abs && neg && other.type() == instr->definitions[0].getTemp().type())
                ctx.info[instr->definitions[0].tempId()].set_neg(other, bit_size);
             else if (!abs && !neg) {
                if (denorm_mode == fp_denorm_keep || ctx.info[other.id()].is_canonicalized(bit_size))