nir/search: never insert movs for alu uses

This means we respect the pattern order better because
simple replacements like bcsel(False, a, b) -> b no longer
insert movs that can block more specialized patterns.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40399>
This commit is contained in:
Georg Lehmann 2026-03-14 08:18:23 +01:00 committed by Marge Bot
parent 1626df7a90
commit 3ad142d4d7
4 changed files with 48 additions and 20 deletions

View file

@ -408,6 +408,36 @@ nir_vec_scalars(nir_builder *build, nir_scalar *comp, unsigned num_components)
return &instr->def;
}
nir_def *
nir_def_rewrite_uses_with_alu_src(nir_builder *build, nir_def *def,
nir_alu_src src, unsigned num_components)
{
if (nir_alu_src_is_trivial_ssa(&src, num_components)) {
nir_def_rewrite_uses(def, src.src.ssa);
return NULL;
}
nir_def *mov = NULL;
nir_foreach_use_including_if_safe(use, def) {
if (nir_src_is_if(use) || nir_src_parent_instr(use)->type != nir_instr_type_alu) {
if (!mov)
mov = nir_mov_alu(build, src, num_components);
nir_src_rewrite(use, mov);
} else {
nir_alu_src *alu_src = container_of(use, nir_alu_src, src);
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
alu_src->swizzle[i] = src.swizzle[alu_src->swizzle[i]];
nir_src_rewrite(use, src.src.ssa);
}
}
return mov;
}
/**
* Get nir_def for an alu src, respecting the nir_alu_src's swizzle.
*/

View file

@ -728,6 +728,12 @@ nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
return &mov->def;
}
/* Tries to avoid inserting a mov for the replacement,
* but if it has to insert one to handle non-alu, it's return instead of NULL.
*/
nir_def *
nir_def_rewrite_uses_with_alu_src(nir_builder *build, nir_def *def, nir_alu_src src, unsigned num_components);
/**
* Construct a mov that reswizzles the source's components.
*/

View file

@ -673,7 +673,7 @@ nir_algebraic_update_automaton(nir_instr *new_instr,
nir_instr_worklist_fini(&automaton_worklist);
}
static nir_def *
static bool
nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
const nir_search_state *search_state,
struct util_dynarray *states,
@ -710,7 +710,7 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
}
}
if (!found)
return NULL;
return false;
#if 0
fprintf(stderr, "matched: ");
@ -766,18 +766,16 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
/* Note that NIR builder will elide the MOV if it's a no-op, which may
* allow more work to be done in a single pass through algebraic.
*/
nir_def *ssa_val =
nir_mov_alu(build, val, instr->def.num_components);
if (ssa_val->index == util_dynarray_num_elements(states, uint16_t)) {
nir_def *mov = nir_def_rewrite_uses_with_alu_src(build, &instr->def, val,
instr->def.num_components);
if (mov) {
util_dynarray_append_typed(states, uint16_t, 0);
nir_algebraic_automaton(nir_def_instr(ssa_val), states, table->pass_op_table);
nir_algebraic_automaton(nir_def_instr(mov), states, table->pass_op_table);
}
/* Rewrite the uses of the old SSA value to the new one, and recurse
* through the uses updating the automaton's state.
*/
nir_def_rewrite_uses(&instr->def, ssa_val);
nir_algebraic_update_automaton(nir_def_instr(ssa_val), algebraic_worklist,
/* Recurse through the uses updating the automaton's state. */
nir_algebraic_update_automaton(nir_def_instr(val.src.ssa), algebraic_worklist,
states, table->pass_op_table);
/* Nothing uses the instr any more, so drop it out of the program. Note
@ -789,7 +787,7 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
nir_instr_remove(&instr->instr);
exec_list_push_tail(dead_instrs, &instr->instr.node);
return ssa_val;
return true;
}
static bool

View file

@ -1014,15 +1014,9 @@ TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
ASSERT_EQ(load->def.num_components, 3);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
nir_def *val = loads[0x2]->src.ssa;
ASSERT_EQ(val->bit_size, 64);
ASSERT_EQ(val->num_components, 1);
ASSERT_TRUE(test_alu(val, nir_op_mov));
nir_alu_instr *mov = nir_def_as_alu(val);
EXPECT_INSTR_SWIZZLES(mov, load, "y");
val = loads[0x1]->src.ssa;
nir_def *val = loads[0x1]->src.ssa;
ASSERT_EQ(val->bit_size, 32);
ASSERT_EQ(val->num_components, 2);
ASSERT_TRUE(test_alu(val, nir_op_unpack_64_2x32));