nir/opt_intrinsic: fix inclusive scan rewrite with multiple uses

Modifying the iterated list is a footgun, so just create a new instruction.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13364
Fixes: 5c70a55bf3 ("nir/opt_intrinsics: optimize (exclusive_scan(op, a) op a) to inclusive scan")

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35577>
This commit is contained in:
Georg Lehmann 2025-06-17 16:10:07 +02:00 committed by Marge Bot
parent ab5605aab3
commit e9c886c331

View file

@ -260,18 +260,20 @@ opt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu,
}
static bool
try_opt_exclusive_scan_to_inclusive(nir_intrinsic_instr *intrin)
try_opt_exclusive_scan_to_inclusive(nir_builder *b, nir_intrinsic_instr *intrin)
{
if (intrin->def.num_components != 1)
return false;
nir_op reduction_op = nir_intrinsic_reduction_op(intrin);
nir_foreach_use_including_if(src, &intrin->def) {
if (nir_src_is_if(src) || nir_src_parent_instr(src)->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
if (alu->op != (nir_op)nir_intrinsic_reduction_op(intrin))
if (alu->op != reduction_op)
return false;
/* Don't reassociate exact float operations. */
@ -309,14 +311,16 @@ try_opt_exclusive_scan_to_inclusive(nir_intrinsic_instr *intrin)
}
/* Convert to inclusive scan. */
intrin->intrinsic = nir_intrinsic_inclusive_scan;
nir_def *incl_scan = nir_inclusive_scan(b, intrin->src[0].ssa, .reduction_op = reduction_op);
nir_foreach_use_including_if_safe(src, &intrin->def) {
/* Remove alu. */
nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
nir_def_replace(&alu->def, &intrin->def);
nir_def_replace(&alu->def, incl_scan);
}
nir_instr_remove(&intrin->instr);
return true;
}
@ -374,7 +378,7 @@ opt_intrinsics_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
return progress;
}
case nir_intrinsic_exclusive_scan:
return try_opt_exclusive_scan_to_inclusive(intrin);
return try_opt_exclusive_scan_to_inclusive(b, intrin);
default:
return false;
}