From 643dd510d4992f287a11ae3772a9da2a9f2be048 Mon Sep 17 00:00:00 2001
From: Georg Lehmann <dadschoorse@gmail.com>
Date: Fri, 13 Mar 2026 14:21:26 +0100
Subject: [PATCH] nir/opt_algebraic: optimize b2f(a) * b

When the multiplication is only used by fadd, it's not a clear win
because of potential fma fusion.

Totals from 8015 (6.99% of 114655) affected shaders:
MaxWaves: 199394 -> 199466 (+0.04%); split: +0.04%, -0.01%
Instrs: 17461518 -> 17451076 (-0.06%); split: -0.10%, +0.04%
CodeSize: 94779552 -> 94769828 (-0.01%); split: -0.07%, +0.06%
VGPRs: 526012 -> 525532 (-0.09%); split: -0.10%, +0.01%
SpillSGPRs: 12466 -> 12517 (+0.41%); split: -0.09%, +0.50%
Latency: 191274766 -> 191297394 (+0.01%); split: -0.03%, +0.04%
InvThroughput: 31465968 -> 31456785 (-0.03%); split: -0.07%, +0.04%
VClause: 312081 -> 312073 (-0.00%); split: -0.10%, +0.09%
SClause: 366914 -> 366906 (-0.00%); split: -0.02%, +0.01%
Copies: 1222482 -> 1221933 (-0.04%); split: -0.20%, +0.15%
Branches: 376651 -> 376577 (-0.02%); split: -0.03%, +0.01%
PreSGPRs: 442974 -> 443240 (+0.06%); split: -0.01%, +0.07%
PreVGPRs: 415964 -> 415668 (-0.07%); split: -0.09%, +0.02%
VALU: 9403517 -> 9393916 (-0.10%); split: -0.12%, +0.02%
SALU: 2799420 -> 2800430 (+0.04%); split: -0.13%, +0.16%
VOPD: 472826 -> 472347 (-0.10%); split: +0.09%, -0.19%

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40399>
---
 src/compiler/nir/nir_opt_algebraic.py | 13 +++++++++++++
 src/compiler/nir/nir_search_helpers.h |  6 ++++++
 2 files changed, 19 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 2b2ba38198d..7bb4e23348a 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1623,6 +1623,19 @@ optimizations.extend([
    (('iand', 'a@bool16', 1.0), ('b2f', a)),
    (('iand', 'a@bool32', 1.0), ('b2f', a)),
 
+   # For this optimization, there are a few things to consider:
+   # The replacement must flush denorms, fcanonicalize/fneg takes care of that.
+   # For fmul, if b is not finite, b2f(False) * b would need to be NaN.
+   # If b is negative, b2f(False) * b would be -0.0, not +0.0, hence the nsz.
+   # For fmulz, if b is -0.0, b2f(True) * b would need to be +0.0, not b.
+   # So even there nsz is needed.
+   # When the multiplication is only used by fadd, it's not a clear win
+   # because of potential fma fusion.
+   (('fmul(nsz,is_not_only_used_by_fadd)',  ('b2f', 'a@1'), 'b(is_finite)'),           ('bcsel', a, ('fcanonicalize', b), 0.0)),
+   (('fmul(nsz,is_not_only_used_by_fadd)',  ('fneg', ('b2f', 'a@1')), 'b(is_finite)'), ('bcsel', a, ('fneg', b), 0.0)),
+   (('fmulz(nsz,is_not_only_used_by_fadd)', ('b2f', 'a@1'), b),           ('bcsel', a, ('fcanonicalize', b), 0.0)),
+   (('fmulz(nsz,is_not_only_used_by_fadd)', ('fneg', ('b2f', 'a@1')), b), ('bcsel', a, ('fneg', b), 0.0)),
+
    # Comparison with the same args.  Note that these are only done for the
    # float versions when the source must be a number.  Generally, NaN cmp NaN
    # produces the opposite result of X cmp X.  flt is the outlier.  NaN < NaN
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 454e62b934e..be838f4a35b 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -592,6 +592,12 @@ is_only_used_by_fadd(const nir_alu_instr *instr)
    return true;
 }
 
+static inline bool
+is_not_only_used_by_fadd(const nir_alu_instr *instr)
+{
+   return !is_only_used_by_fadd(instr);
+}
+
 static inline bool
 is_only_used_by_alu_op(const nir_alu_instr *instr, nir_op op)
 {