nir/lower_bool_to_bitsize: Make all bN_csel sources match

Previously, we assumed that the selector for bcsel could be whatever,
regardless of the bit sizes of the data and we'd just fix it in the
back-end.  This works okay for scalars but falls over the moment we
vectorize because all our vector handling assumes bit sizes match.
Since matching bit sizes is what the hardware wants anyway, it's better
to do the right thing in NIR and hope copy-propagation can fold in
conversions if needed.

Unfortunately, copy prop isn't that smart yet so this does hurt a bit:

    Instrs: 1193679 -> 1198086 (+0.37%); split: -0.06%, +0.43%
    CodeSize: 11915136 -> 11950592 (+0.30%); split: -0.05%, +0.34%
    Full: 160985 -> 160941 (-0.03%); split: -0.04%, +0.01%
    Estimated normalized CVT cycles: 4456.938557000181 -> 4480.876069000186 (+0.54%); split: -0.13%, +0.67%
    Estimated normalized SFU cycles: 6350.9375 -> 6392.21875 (+0.65%)
    Estimated normalized Load/Store cycles: 205773.0 -> 205795.0 (+0.01%)
    Maximum number of threads: 12864 -> 12863 (-0.01%)
    Number of spill instructions: 22487 -> 22489 (+0.01%)
    Number of fill instructions: 52179 -> 52219 (+0.08%)

Hurt shaders:

    google-meet-clvk/BgBlur
    google-meet-clvk/Relight
    parallel-rdp/small_subgroup
    parallel-rdp/small_uber_subgroup

The proper solution here is to teach copy-prop about this stuff so that
it can propagate swizzles into ALU ops when they're supported:
https://gitlab.freedesktop.org/panfrost/mesa/-/issues/265

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14945
Cc: mesa-stable
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
(cherry picked from commit 3fd471dca5)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40359>
This commit is contained in:
Faith Ekstrand 2026-03-09 15:09:23 -04:00 committed by Eric Engestrom
parent 740734ac72
commit 9c2b19219a
2 changed files with 32 additions and 21 deletions

View file

@ -494,7 +494,7 @@
"description": "nir/lower_bool_to_bitsize: Make all bN_csel sources match",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -57,6 +57,32 @@ get_bool_convert_opcode(uint32_t dst_bit_size)
}
}
static void
resize_bool_alu_source(nir_builder *b, nir_alu_instr *alu,
uint32_t src_idx, uint32_t bit_size)
{
if (nir_src_bit_size(alu->src[src_idx].src) == bit_size)
return;
b->cursor = nir_before_instr(&alu->instr);
nir_op convert_op = get_bool_convert_opcode(bit_size);
/* Retain the number of components and swizzle of the original
* instruction so that we dont unnecessarily create a vectorized
* instruction.
*/
nir_def *new_src =
nir_build_alu1(b, convert_op, nir_ssa_for_alu_src(b, alu, src_idx));
nir_src_rewrite(&alu->src[src_idx].src, new_src);
/* The swizzle will have been handled by the conversion instruction
* so we can reset it back to the default
*/
for (unsigned j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)
alu->src[src_idx].swizzle[j] = j;
}
static void
make_sources_canonical(nir_builder *b, nir_alu_instr *alu, uint32_t start_idx)
{
@ -65,25 +91,8 @@ make_sources_canonical(nir_builder *b, nir_alu_instr *alu, uint32_t start_idx)
*/
const nir_op_info *op_info = &nir_op_infos[alu->op];
uint32_t bit_size = nir_src_bit_size(alu->src[start_idx].src);
for (uint32_t i = start_idx + 1; i < op_info->num_inputs; i++) {
if (nir_src_bit_size(alu->src[i].src) != bit_size) {
b->cursor = nir_before_instr(&alu->instr);
nir_op convert_op = get_bool_convert_opcode(bit_size);
/* Retain the number of components and swizzle of the original
* instruction so that we dont unnecessarily create a vectorized
* instruction.
*/
nir_def *new_src =
nir_build_alu1(b, convert_op, nir_ssa_for_alu_src(b, alu, i));
nir_src_rewrite(&alu->src[i].src, new_src);
/* The swizzle will have been handled by the conversion instruction
* so we can reset it back to the default
*/
for (unsigned j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)
alu->src[i].swizzle[j] = j;
}
}
for (uint32_t i = start_idx + 1; i < op_info->num_inputs; i++)
resize_bool_alu_source(b, alu, i, bit_size);
}
static bool
@ -130,7 +139,9 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
case nir_op_bcsel:
/* bcsel may be choosing between boolean sources too */
if (alu->def.bit_size == 1)
make_sources_canonical(b, alu, 1);
make_sources_canonical(b, alu, 0);
else
resize_bool_alu_source(b, alu, 0, alu->def.bit_size);
break;
default: