From 3ad142d4d7d12fafb72995e17e2aaf529a6eed9d Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 14 Mar 2026 08:18:23 +0100 Subject: [PATCH] nir/search: never insert movs for alu uses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This means we respect the pattern order better because simple replacements like bcsel(False, a, b) -> b no longer insert movs that can block more specialized patterns. Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_builder.c | 30 +++++++++++++++++++ src/compiler/nir/nir_builder.h | 6 ++++ src/compiler/nir/nir_search.c | 22 +++++++------- .../nir/tests/load_store_vectorizer_tests.cpp | 10 ++----- 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/compiler/nir/nir_builder.c b/src/compiler/nir/nir_builder.c index e20c8863912..0a324f0053d 100644 --- a/src/compiler/nir/nir_builder.c +++ b/src/compiler/nir/nir_builder.c @@ -408,6 +408,36 @@ nir_vec_scalars(nir_builder *build, nir_scalar *comp, unsigned num_components) return &instr->def; } +nir_def * +nir_def_rewrite_uses_with_alu_src(nir_builder *build, nir_def *def, + nir_alu_src src, unsigned num_components) +{ + if (nir_alu_src_is_trivial_ssa(&src, num_components)) { + nir_def_rewrite_uses(def, src.src.ssa); + return NULL; + } + + nir_def *mov = NULL; + + nir_foreach_use_including_if_safe(use, def) { + if (nir_src_is_if(use) || nir_src_parent_instr(use)->type != nir_instr_type_alu) { + if (!mov) + mov = nir_mov_alu(build, src, num_components); + + nir_src_rewrite(use, mov); + } else { + nir_alu_src *alu_src = container_of(use, nir_alu_src, src); + + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) + alu_src->swizzle[i] = src.swizzle[alu_src->swizzle[i]]; + + nir_src_rewrite(use, src.src.ssa); + } + } + + return mov; +} + /** * Get nir_def for an alu src, respecting the nir_alu_src's swizzle. */ diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 6bbb8c47d41..37236e9bc69 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -728,6 +728,12 @@ nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) return &mov->def; } +/* Tries to avoid inserting a mov for the replacement, + * but if it has to insert one to handle non-alu, it's return instead of NULL. + */ +nir_def * +nir_def_rewrite_uses_with_alu_src(nir_builder *build, nir_def *def, nir_alu_src src, unsigned num_components); + /** * Construct a mov that reswizzles the source's components. */ diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 941858ec376..3e1e021045a 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -673,7 +673,7 @@ nir_algebraic_update_automaton(nir_instr *new_instr, nir_instr_worklist_fini(&automaton_worklist); } -static nir_def * +static bool nir_replace_instr(nir_builder *build, nir_alu_instr *instr, const nir_search_state *search_state, struct util_dynarray *states, @@ -710,7 +710,7 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr, } } if (!found) - return NULL; + return false; #if 0 fprintf(stderr, "matched: "); @@ -766,18 +766,16 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr, /* Note that NIR builder will elide the MOV if it's a no-op, which may * allow more work to be done in a single pass through algebraic. */ - nir_def *ssa_val = - nir_mov_alu(build, val, instr->def.num_components); - if (ssa_val->index == util_dynarray_num_elements(states, uint16_t)) { + nir_def *mov = nir_def_rewrite_uses_with_alu_src(build, &instr->def, val, + instr->def.num_components); + + if (mov) { util_dynarray_append_typed(states, uint16_t, 0); - nir_algebraic_automaton(nir_def_instr(ssa_val), states, table->pass_op_table); + nir_algebraic_automaton(nir_def_instr(mov), states, table->pass_op_table); } - /* Rewrite the uses of the old SSA value to the new one, and recurse - * through the uses updating the automaton's state. - */ - nir_def_rewrite_uses(&instr->def, ssa_val); - nir_algebraic_update_automaton(nir_def_instr(ssa_val), algebraic_worklist, + /* Recurse through the uses updating the automaton's state. */ + nir_algebraic_update_automaton(nir_def_instr(val.src.ssa), algebraic_worklist, states, table->pass_op_table); /* Nothing uses the instr any more, so drop it out of the program. Note @@ -789,7 +787,7 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr, nir_instr_remove(&instr->instr); exec_list_push_tail(dead_instrs, &instr->instr.node); - return ssa_val; + return true; } static bool diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp index 5bd3d0f8c16..9a51452ff91 100644 --- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp +++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp @@ -1014,15 +1014,9 @@ TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64) ASSERT_EQ(load->def.num_components, 3); ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z"); + EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y"); - nir_def *val = loads[0x2]->src.ssa; - ASSERT_EQ(val->bit_size, 64); - ASSERT_EQ(val->num_components, 1); - ASSERT_TRUE(test_alu(val, nir_op_mov)); - nir_alu_instr *mov = nir_def_as_alu(val); - EXPECT_INSTR_SWIZZLES(mov, load, "y"); - - val = loads[0x1]->src.ssa; + nir_def *val = loads[0x1]->src.ssa; ASSERT_EQ(val->bit_size, 32); ASSERT_EQ(val->num_components, 2); ASSERT_TRUE(test_alu(val, nir_op_unpack_64_2x32));