mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 20:18:12 +02:00
nir/lower_bool_to_bitsize: Make all bN_csel sources match
Previously, we assumed that the selector for bcsel could be whatever,
regardless of the bit sizes of the data and we'd just fix it in the
back-end. This works okay for scalars but falls over the moment we
vectorize because all our vector handling assumes bit sizes match.
Since matching bit sizes is what the hardware wants anyway, it's better
to do the right thing in NIR and hope copy-propagation can fold in
conversions if needed.
Unfortunately, copy prop isn't that smart yet so this does hurt a bit:
Instrs: 1193679 -> 1198086 (+0.37%); split: -0.06%, +0.43%
CodeSize: 11915136 -> 11950592 (+0.30%); split: -0.05%, +0.34%
Full: 160985 -> 160941 (-0.03%); split: -0.04%, +0.01%
Estimated normalized CVT cycles: 4456.938557000181 -> 4480.876069000186 (+0.54%); split: -0.13%, +0.67%
Estimated normalized SFU cycles: 6350.9375 -> 6392.21875 (+0.65%)
Estimated normalized Load/Store cycles: 205773.0 -> 205795.0 (+0.01%)
Maximum number of threads: 12864 -> 12863 (-0.01%)
Number of spill instructions: 22487 -> 22489 (+0.01%)
Number of fill instructions: 52179 -> 52219 (+0.08%)
Hurt shaders:
google-meet-clvk/BgBlur
google-meet-clvk/Relight
parallel-rdp/small_subgroup
parallel-rdp/small_uber_subgroup
The proper solution here is to teach copy-prop about this stuff so that
it can propagate swizzles into ALU ops when they're supported:
https://gitlab.freedesktop.org/panfrost/mesa/-/issues/265
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14945
Cc: mesa-stable
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
(cherry picked from commit 3fd471dca5)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40359>
This commit is contained in:
parent
740734ac72
commit
9c2b19219a
2 changed files with 32 additions and 21 deletions
|
|
@ -494,7 +494,7 @@
|
|||
"description": "nir/lower_bool_to_bitsize: Make all bN_csel sources match",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -57,6 +57,32 @@ get_bool_convert_opcode(uint32_t dst_bit_size)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
resize_bool_alu_source(nir_builder *b, nir_alu_instr *alu,
|
||||
uint32_t src_idx, uint32_t bit_size)
|
||||
{
|
||||
if (nir_src_bit_size(alu->src[src_idx].src) == bit_size)
|
||||
return;
|
||||
|
||||
b->cursor = nir_before_instr(&alu->instr);
|
||||
nir_op convert_op = get_bool_convert_opcode(bit_size);
|
||||
|
||||
/* Retain the number of components and swizzle of the original
|
||||
* instruction so that we don’t unnecessarily create a vectorized
|
||||
* instruction.
|
||||
*/
|
||||
nir_def *new_src =
|
||||
nir_build_alu1(b, convert_op, nir_ssa_for_alu_src(b, alu, src_idx));
|
||||
|
||||
nir_src_rewrite(&alu->src[src_idx].src, new_src);
|
||||
|
||||
/* The swizzle will have been handled by the conversion instruction
|
||||
* so we can reset it back to the default
|
||||
*/
|
||||
for (unsigned j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)
|
||||
alu->src[src_idx].swizzle[j] = j;
|
||||
}
|
||||
|
||||
static void
|
||||
make_sources_canonical(nir_builder *b, nir_alu_instr *alu, uint32_t start_idx)
|
||||
{
|
||||
|
|
@ -65,25 +91,8 @@ make_sources_canonical(nir_builder *b, nir_alu_instr *alu, uint32_t start_idx)
|
|||
*/
|
||||
const nir_op_info *op_info = &nir_op_infos[alu->op];
|
||||
uint32_t bit_size = nir_src_bit_size(alu->src[start_idx].src);
|
||||
for (uint32_t i = start_idx + 1; i < op_info->num_inputs; i++) {
|
||||
if (nir_src_bit_size(alu->src[i].src) != bit_size) {
|
||||
b->cursor = nir_before_instr(&alu->instr);
|
||||
nir_op convert_op = get_bool_convert_opcode(bit_size);
|
||||
/* Retain the number of components and swizzle of the original
|
||||
* instruction so that we don’t unnecessarily create a vectorized
|
||||
* instruction.
|
||||
*/
|
||||
nir_def *new_src =
|
||||
nir_build_alu1(b, convert_op, nir_ssa_for_alu_src(b, alu, i));
|
||||
|
||||
nir_src_rewrite(&alu->src[i].src, new_src);
|
||||
/* The swizzle will have been handled by the conversion instruction
|
||||
* so we can reset it back to the default
|
||||
*/
|
||||
for (unsigned j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)
|
||||
alu->src[i].swizzle[j] = j;
|
||||
}
|
||||
}
|
||||
for (uint32_t i = start_idx + 1; i < op_info->num_inputs; i++)
|
||||
resize_bool_alu_source(b, alu, i, bit_size);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -130,7 +139,9 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
|
|||
case nir_op_bcsel:
|
||||
/* bcsel may be choosing between boolean sources too */
|
||||
if (alu->def.bit_size == 1)
|
||||
make_sources_canonical(b, alu, 1);
|
||||
make_sources_canonical(b, alu, 0);
|
||||
else
|
||||
resize_bool_alu_source(b, alu, 0, alu->def.bit_size);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue