From f4812dc11d48204091260914d481784eb55a75eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daniel@schuermann.dev>
Date: Tue, 31 Mar 2026 10:51:59 +0200
Subject: [PATCH] nir/opt_constant_folding: constant-fold op(bcsel(), #c) ->
 bcsel(.., #c1, #c2)

for all ALU instructions except fneg instead of using nir_opt_algebraic
for a small subset.

Totals from 17711 (8.49% of 208640) affected shaders: (Navi48)
MaxWaves: 364391 -> 364397 (+0.00%); split: +0.01%, -0.01%
Instrs: 33873994 -> 33780398 (-0.28%); split: -0.31%, +0.03%
CodeSize: 198627596 -> 198259724 (-0.19%); split: -0.23%, +0.05%
VGPRs: 1435516 -> 1435144 (-0.03%); split: -0.04%, +0.02%
SpillSGPRs: 652827 -> 654577 (+0.27%); split: -0.00%, +0.27%
SpillVGPRs: 594840 -> 593598 (-0.21%); split: -0.28%, +0.07%
Scratch: 31791360 -> 31543552 (-0.78%)
Latency: 417824569 -> 415881858 (-0.46%); split: -0.48%, +0.02%
InvThroughput: 80376232 -> 80307996 (-0.08%); split: -0.10%, +0.01%
VClause: 557238 -> 554770 (-0.44%); split: -0.50%, +0.06%
SClause: 688297 -> 688125 (-0.02%); split: -0.04%, +0.02%
Copies: 3571756 -> 3566704 (-0.14%); split: -0.44%, +0.29%
Branches: 628710 -> 628576 (-0.02%); split: -0.07%, +0.05%
PreSGPRs: 1100316 -> 1103478 (+0.29%); split: -0.02%, +0.30%
PreVGPRs: 1132139 -> 1128765 (-0.30%); split: -0.30%, +0.00%
VALU: 18944830 -> 18912030 (-0.17%); split: -0.20%, +0.03%
SALU: 4363054 -> 4342748 (-0.47%); split: -0.57%, +0.10%
VMEM: 1894420 -> 1891754 (-0.14%); split: -0.19%, +0.05%
SMEM: 1073860 -> 1073741 (-0.01%); split: -0.01%, +0.00%
VOPD: 1734659 -> 1735718 (+0.06%); split: +0.20%, -0.14%

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40848>
---
 src/compiler/nir/nir_opt_constant_folding.c | 81 +++++++++++++++++++--
 src/compiler/nir/nir_search.c               | 10 +++
 2 files changed, 85 insertions(+), 6 deletions(-)

diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 134ec171fd8..8526e175ce9 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -55,10 +55,41 @@ const_value_for_alu(nir_builder *b, nir_alu_instr *alu, unsigned bit_size,
                         dest);
 }
 
+static bool
+is_bcsel_with_two_constants(nir_alu_instr *bcsel)
+{
+   return bcsel && bcsel->op == nir_op_bcsel &&
+          bcsel->def.num_components == 1 &&
+          bcsel->src[0].swizzle[0] == 0 &&
+          bcsel->src[0].src.ssa->num_components == 1 &&
+          nir_src_is_const(bcsel->src[1].src) &&
+          nir_src_is_const(bcsel->src[2].src);
+}
+
+static bool
+should_fold_bcsel(nir_alu_instr *alu)
+{
+   /* Don't fold bcsel if the resulting bit size is larger than 32 bit
+    * as these commonly require two instructions.
+    */
+   if (alu->def.bit_size > 32)
+      return false;
+
+   /* Don't fight with nir_lower_load_const_to_scalar. */
+   if (nir_op_is_vec_or_mov(alu->op))
+      return false;
+
+   /* Make an exception for fneg, because in many cases it can be
+    * folded with the next instruction.
+    */
+   return alu->op != nir_op_fneg;
+}
+
 nir_def *
 nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu)
 {
    nir_const_value src[NIR_ALU_MAX_INPUTS][NIR_MAX_VEC_COMPONENTS];
+   nir_def *bcsel = NULL;
 
    /* In the case that any outputs/inputs have unsized types, then we need to
     * guess the bit-size. In this case, the validator ensures that all
@@ -79,19 +110,57 @@ nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu)
          bit_size = alu->src[i].src.ssa->bit_size;
 
       nir_load_const_instr *load_const = nir_src_as_load_const(alu->src[i].src);
-      if (!load_const)
-         return NULL;
 
-      for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i);
-           j++) {
-         src[i][j] = load_const->value[alu->src[i].swizzle[j]];
+      if (load_const) {
+         for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++)
+            src[i][j] = load_const->value[alu->src[i].swizzle[j]];
+         continue;
+      }
+
+      /* Check if the source is a bcsel with two constants. */
+      nir_alu_instr *bcsel_alu = nir_src_as_alu(alu->src[i].src);
+      if (should_fold_bcsel(alu) && is_bcsel_with_two_constants(bcsel_alu)) {
+         /* If there is multiple bcsel sources, they must use the same condition. */
+         if (bcsel && bcsel_alu->src[0].src.ssa != bcsel)
+            return false;
+
+         bcsel = bcsel_alu->src[0].src.ssa;
+
+         /* Use first bcsel constant. */
+         load_const = nir_src_as_load_const(bcsel_alu->src[1].src);
+         for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++)
+            src[i][j] = load_const->value[bcsel_alu->src[1].swizzle[alu->src[i].swizzle[j]]];
+      } else {
+         return NULL;
       }
    }
 
    if (bit_size == 0)
       bit_size = 32;
 
-   return const_value_for_alu(b, alu, bit_size, src);
+   /* If all sources are constant, we can fold the ALU. */
+   if (!bcsel)
+      return const_value_for_alu(b, alu, bit_size, src);
+
+   /* At least one source is a bcsel with two constants. Fold the ALU twice
+    * and create a new bcsel, selecting between the folded values.
+    */
+   nir_def *then_const = const_value_for_alu(b, alu, bit_size, src);
+
+   /* Create second bcsel constant. */
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+      nir_alu_instr *bcsel_alu = nir_src_as_alu(alu->src[i].src);
+      if (!bcsel_alu)
+         continue;
+
+      nir_load_const_instr *load_const = nir_src_as_load_const(bcsel_alu->src[2].src);
+      for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++) {
+         src[i][j] = load_const->value[bcsel_alu->src[2].swizzle[alu->src[i].swizzle[j]]];
+      }
+   }
+   nir_def *else_const = const_value_for_alu(b, alu, bit_size, src);
+
+   return nir_bcsel(b, bcsel, then_const, else_const);
 }
 
 static nir_const_value *
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 403e5fc08ae..bddfd7f365d 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -25,6 +25,7 @@
 #include <inttypes.h>
 #include "util/half_float.h"
 #include "nir_builder.h"
+#include "nir_opcodes.h"
 #include "nir_worklist.h"
 
 /* This should be the same as nir_search_max_comm_ops in nir_algebraic.py. */
@@ -441,6 +442,15 @@ construct_value(nir_builder *build,
       if (const_expr) {
          nir_instr_free(&alu->instr);
          def = const_expr;
+         if (nir_def_is_alu(def)) {
+            /* The instruction got folded into bcsel of two constants. */
+            nir_alu_instr *bcsel = nir_def_as_alu(def);
+            assert(bcsel->op == nir_op_bcsel);
+            util_dynarray_append_typed(state->states, uint16_t, 0);
+            nir_algebraic_automaton(nir_src_parent_instr(&bcsel->src[1].src), state->states, state->pass_op_table);
+            util_dynarray_append_typed(state->states, uint16_t, 0);
+            nir_algebraic_automaton(nir_src_parent_instr(&bcsel->src[2].src), state->states, state->pass_op_table);
+         }
       } else {
          nir_builder_instr_insert(build, &alu->instr);
       }