nir/opt_constant_folding: constant-fold op(bcsel(), #c) -> bcsel(.., #c1, #c2)

for all ALU instructions except fneg instead of using nir_opt_algebraic
for a small subset.

Totals from 17711 (8.49% of 208640) affected shaders: (Navi48)
MaxWaves: 364391 -> 364397 (+0.00%); split: +0.01%, -0.01%
Instrs: 33873994 -> 33780398 (-0.28%); split: -0.31%, +0.03%
CodeSize: 198627596 -> 198259724 (-0.19%); split: -0.23%, +0.05%
VGPRs: 1435516 -> 1435144 (-0.03%); split: -0.04%, +0.02%
SpillSGPRs: 652827 -> 654577 (+0.27%); split: -0.00%, +0.27%
SpillVGPRs: 594840 -> 593598 (-0.21%); split: -0.28%, +0.07%
Scratch: 31791360 -> 31543552 (-0.78%)
Latency: 417824569 -> 415881858 (-0.46%); split: -0.48%, +0.02%
InvThroughput: 80376232 -> 80307996 (-0.08%); split: -0.10%, +0.01%
VClause: 557238 -> 554770 (-0.44%); split: -0.50%, +0.06%
SClause: 688297 -> 688125 (-0.02%); split: -0.04%, +0.02%
Copies: 3571756 -> 3566704 (-0.14%); split: -0.44%, +0.29%
Branches: 628710 -> 628576 (-0.02%); split: -0.07%, +0.05%
PreSGPRs: 1100316 -> 1103478 (+0.29%); split: -0.02%, +0.30%
PreVGPRs: 1132139 -> 1128765 (-0.30%); split: -0.30%, +0.00%
VALU: 18944830 -> 18912030 (-0.17%); split: -0.20%, +0.03%
SALU: 4363054 -> 4342748 (-0.47%); split: -0.57%, +0.10%
VMEM: 1894420 -> 1891754 (-0.14%); split: -0.19%, +0.05%
SMEM: 1073860 -> 1073741 (-0.01%); split: -0.01%, +0.00%
VOPD: 1734659 -> 1735718 (+0.06%); split: +0.20%, -0.14%

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40848>
This commit is contained in:
Daniel Schürmann 2026-03-31 10:51:59 +02:00 committed by Marge Bot
parent 8b1c60add4
commit f4812dc11d
2 changed files with 85 additions and 6 deletions

View file

@ -55,10 +55,41 @@ const_value_for_alu(nir_builder *b, nir_alu_instr *alu, unsigned bit_size,
dest); dest);
} }
static bool
is_bcsel_with_two_constants(nir_alu_instr *bcsel)
{
return bcsel && bcsel->op == nir_op_bcsel &&
bcsel->def.num_components == 1 &&
bcsel->src[0].swizzle[0] == 0 &&
bcsel->src[0].src.ssa->num_components == 1 &&
nir_src_is_const(bcsel->src[1].src) &&
nir_src_is_const(bcsel->src[2].src);
}
static bool
should_fold_bcsel(nir_alu_instr *alu)
{
/* Don't fold bcsel if the resulting bit size is larger than 32 bit
* as these commonly require two instructions.
*/
if (alu->def.bit_size > 32)
return false;
/* Don't fight with nir_lower_load_const_to_scalar. */
if (nir_op_is_vec_or_mov(alu->op))
return false;
/* Make an exception for fneg, because in many cases it can be
* folded with the next instruction.
*/
return alu->op != nir_op_fneg;
}
nir_def * nir_def *
nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu) nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu)
{ {
nir_const_value src[NIR_ALU_MAX_INPUTS][NIR_MAX_VEC_COMPONENTS]; nir_const_value src[NIR_ALU_MAX_INPUTS][NIR_MAX_VEC_COMPONENTS];
nir_def *bcsel = NULL;
/* In the case that any outputs/inputs have unsized types, then we need to /* In the case that any outputs/inputs have unsized types, then we need to
* guess the bit-size. In this case, the validator ensures that all * guess the bit-size. In this case, the validator ensures that all
@ -79,19 +110,57 @@ nir_try_constant_fold_alu(nir_builder *b, nir_alu_instr *alu)
bit_size = alu->src[i].src.ssa->bit_size; bit_size = alu->src[i].src.ssa->bit_size;
nir_load_const_instr *load_const = nir_src_as_load_const(alu->src[i].src); nir_load_const_instr *load_const = nir_src_as_load_const(alu->src[i].src);
if (!load_const)
return NULL;
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); if (load_const) {
j++) { for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++)
src[i][j] = load_const->value[alu->src[i].swizzle[j]]; src[i][j] = load_const->value[alu->src[i].swizzle[j]];
continue;
}
/* Check if the source is a bcsel with two constants. */
nir_alu_instr *bcsel_alu = nir_src_as_alu(alu->src[i].src);
if (should_fold_bcsel(alu) && is_bcsel_with_two_constants(bcsel_alu)) {
/* If there is multiple bcsel sources, they must use the same condition. */
if (bcsel && bcsel_alu->src[0].src.ssa != bcsel)
return false;
bcsel = bcsel_alu->src[0].src.ssa;
/* Use first bcsel constant. */
load_const = nir_src_as_load_const(bcsel_alu->src[1].src);
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++)
src[i][j] = load_const->value[bcsel_alu->src[1].swizzle[alu->src[i].swizzle[j]]];
} else {
return NULL;
} }
} }
if (bit_size == 0) if (bit_size == 0)
bit_size = 32; bit_size = 32;
return const_value_for_alu(b, alu, bit_size, src); /* If all sources are constant, we can fold the ALU. */
if (!bcsel)
return const_value_for_alu(b, alu, bit_size, src);
/* At least one source is a bcsel with two constants. Fold the ALU twice
* and create a new bcsel, selecting between the folded values.
*/
nir_def *then_const = const_value_for_alu(b, alu, bit_size, src);
/* Create second bcsel constant. */
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
nir_alu_instr *bcsel_alu = nir_src_as_alu(alu->src[i].src);
if (!bcsel_alu)
continue;
nir_load_const_instr *load_const = nir_src_as_load_const(bcsel_alu->src[2].src);
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i); j++) {
src[i][j] = load_const->value[bcsel_alu->src[2].swizzle[alu->src[i].swizzle[j]]];
}
}
nir_def *else_const = const_value_for_alu(b, alu, bit_size, src);
return nir_bcsel(b, bcsel, then_const, else_const);
} }
static nir_const_value * static nir_const_value *

View file

@ -25,6 +25,7 @@
#include <inttypes.h> #include <inttypes.h>
#include "util/half_float.h" #include "util/half_float.h"
#include "nir_builder.h" #include "nir_builder.h"
#include "nir_opcodes.h"
#include "nir_worklist.h" #include "nir_worklist.h"
/* This should be the same as nir_search_max_comm_ops in nir_algebraic.py. */ /* This should be the same as nir_search_max_comm_ops in nir_algebraic.py. */
@ -441,6 +442,15 @@ construct_value(nir_builder *build,
if (const_expr) { if (const_expr) {
nir_instr_free(&alu->instr); nir_instr_free(&alu->instr);
def = const_expr; def = const_expr;
if (nir_def_is_alu(def)) {
/* The instruction got folded into bcsel of two constants. */
nir_alu_instr *bcsel = nir_def_as_alu(def);
assert(bcsel->op == nir_op_bcsel);
util_dynarray_append_typed(state->states, uint16_t, 0);
nir_algebraic_automaton(nir_src_parent_instr(&bcsel->src[1].src), state->states, state->pass_op_table);
util_dynarray_append_typed(state->states, uint16_t, 0);
nir_algebraic_automaton(nir_src_parent_instr(&bcsel->src[2].src), state->states, state->pass_op_table);
}
} else { } else {
nir_builder_instr_insert(build, &alu->instr); nir_builder_instr_insert(build, &alu->instr);
} }