From c601308615f684c8ebe1a9b7845adc39525bde70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Aug 2025 12:22:37 -0400 Subject: [PATCH] nir: convert nir_instr_worklist to init/fini semantics w/out allocation This removes the malloc overhead. Reviewed-by: Gert Wollny Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/compiler/nir/nir.c | 11 ++++---- src/compiler/nir/nir_opt_barriers.c | 12 ++++----- src/compiler/nir/nir_opt_load_skip_helpers.c | 28 ++++++++++---------- src/compiler/nir/nir_opt_mqsad.c | 13 ++++----- src/compiler/nir/nir_opt_phi_to_bool.c | 11 ++++---- src/compiler/nir/nir_search.c | 22 ++++++++------- src/compiler/nir/nir_worklist.h | 21 +++++---------- src/gallium/drivers/radeonsi/si_nir_optim.c | 11 ++++---- src/intel/compiler/brw_nir_opt_fsat.c | 22 +++++++-------- src/microsoft/compiler/dxil_nir.c | 13 ++++----- 10 files changed, 81 insertions(+), 83 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 72da9f3ef18..009b221748a 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1425,17 +1425,18 @@ nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr) nir_cursor nir_instr_free_and_dce(nir_instr *instr) { - nir_instr_worklist *worklist = nir_instr_worklist_create(); + nir_instr_worklist worklist; + nir_instr_worklist_init(&worklist); - nir_instr_dce_add_dead_ssa_srcs(worklist, instr); + nir_instr_dce_add_dead_ssa_srcs(&worklist, instr); nir_cursor c = nir_instr_remove(instr); struct exec_list to_free; exec_list_make_empty(&to_free); nir_instr *dce_instr; - while ((dce_instr = nir_instr_worklist_pop_head(worklist))) { - nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr); + while ((dce_instr = nir_instr_worklist_pop_head(&worklist))) { + nir_instr_dce_add_dead_ssa_srcs(&worklist, dce_instr); /* If we're removing the instr where our cursor is, then we have to * point the cursor elsewhere. @@ -1451,7 +1452,7 @@ nir_instr_free_and_dce(nir_instr *instr) nir_instr_free_list(&to_free); - nir_instr_worklist_destroy(worklist); + nir_instr_worklist_fini(&worklist); return c; } diff --git a/src/compiler/nir/nir_opt_barriers.c b/src/compiler/nir/nir_opt_barriers.c index 158355f18db..812c731809d 100644 --- a/src/compiler/nir/nir_opt_barriers.c +++ b/src/compiler/nir/nir_opt_barriers.c @@ -302,13 +302,13 @@ nir_opt_barrier_modes_impl(nir_function_impl *impl) { bool progress = false; - nir_instr_worklist *barriers = nir_instr_worklist_create(); - if (!barriers) + nir_instr_worklist barriers; + if (!nir_instr_worklist_init(&barriers)) return false; struct u_vector mem_derefs; if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) { - nir_instr_worklist_destroy(barriers); + nir_instr_worklist_fini(&barriers); return false; } @@ -323,7 +323,7 @@ nir_opt_barrier_modes_impl(nir_function_impl *impl) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic == nir_intrinsic_barrier) - nir_instr_worklist_push_tail(barriers, instr); + nir_instr_worklist_push_tail(&barriers, instr); } else if (instr->type == nir_instr_type_deref) { nir_deref_instr *deref = nir_instr_as_deref(instr); @@ -337,7 +337,7 @@ nir_opt_barrier_modes_impl(nir_function_impl *impl) } } - nir_foreach_instr_in_worklist(instr, barriers) { + nir_foreach_instr_in_worklist(instr, &barriers) { nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr); const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier); @@ -383,7 +383,7 @@ nir_opt_barrier_modes_impl(nir_function_impl *impl) } } - nir_instr_worklist_destroy(barriers); + nir_instr_worklist_fini(&barriers); u_vector_finish(&mem_derefs); return progress; diff --git a/src/compiler/nir/nir_opt_load_skip_helpers.c b/src/compiler/nir/nir_opt_load_skip_helpers.c index 689e7dd5aec..f5ae100bed8 100644 --- a/src/compiler/nir/nir_opt_load_skip_helpers.c +++ b/src/compiler/nir/nir_opt_load_skip_helpers.c @@ -47,8 +47,8 @@ instr_never_needs_helpers(nir_instr *instr) struct helper_state { BITSET_WORD *needs_helpers; - nir_instr_worklist *worklist; - nir_instr_worklist *load_instrs; + nir_instr_worklist worklist; + nir_instr_worklist load_instrs; nir_opt_load_skip_helpers_options *options; }; @@ -66,7 +66,7 @@ set_src_needs_helpers(nir_src *src, void *_data) if (!BITSET_TEST(hs->needs_helpers, src->ssa->index) && !instr_never_needs_helpers(src->ssa->parent_instr)) { BITSET_SET(hs->needs_helpers, src->ssa->index); - nir_instr_worklist_push_tail(hs->worklist, src->ssa->parent_instr); + nir_instr_worklist_push_tail(&hs->worklist, src->ssa->parent_instr); } return true; } @@ -83,10 +83,10 @@ nir_opt_load_skip_helpers(nir_shader *shader, nir_opt_load_skip_helpers_options struct helper_state hs = { .needs_helpers = rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc)), - .worklist = nir_instr_worklist_create(), - .load_instrs = nir_instr_worklist_create(), .options = options, }; + nir_instr_worklist_init(&hs.worklist); + nir_instr_worklist_init(&hs.load_instrs); /* First, add subgroup ops and anything that might cause side effects */ nir_foreach_block(block, impl) { @@ -106,7 +106,7 @@ nir_opt_load_skip_helpers(nir_shader *shader, nir_opt_load_skip_helpers_options /* Stash texture instructions so we don't have to walk the whole * shader again just to set the skip_helpers bit. */ - nir_instr_worklist_push_tail(hs.load_instrs, instr); + nir_instr_worklist_push_tail(&hs.load_instrs, instr); for (uint32_t i = 0; i < tex->num_srcs; i++) { switch (tex->src[i].src_type) { @@ -158,7 +158,7 @@ nir_opt_load_skip_helpers(nir_shader *shader, nir_opt_load_skip_helpers_options continue; } else if (hs.options->intrinsic_cb && hs.options->intrinsic_cb(intr, hs.options->intrinsic_cb_data)) { - nir_instr_worklist_push_tail(hs.load_instrs, instr); + nir_instr_worklist_push_tail(&hs.load_instrs, instr); } else { /* All I/O addresses need helpers because getting them wrong * may cause a fault. @@ -182,15 +182,15 @@ nir_opt_load_skip_helpers(nir_shader *shader, nir_opt_load_skip_helpers_options bool progress = false; /* We only need to run the worklist if we have loads */ - if (!nir_instr_worklist_is_empty(hs.load_instrs)) { - while (!nir_instr_worklist_is_empty(hs.worklist)) { - nir_instr *instr = nir_instr_worklist_pop_head(hs.worklist); + if (!nir_instr_worklist_is_empty(&hs.load_instrs)) { + while (!nir_instr_worklist_is_empty(&hs.worklist)) { + nir_instr *instr = nir_instr_worklist_pop_head(&hs.worklist); assert(nir_foreach_def(instr, def_needs_helpers, &hs)); nir_foreach_src(instr, set_src_needs_helpers, &hs); } - while (!nir_instr_worklist_is_empty(hs.load_instrs)) { - nir_instr *instr = nir_instr_worklist_pop_head(hs.load_instrs); + while (!nir_instr_worklist_is_empty(&hs.load_instrs)) { + nir_instr *instr = nir_instr_worklist_pop_head(&hs.load_instrs); nir_def *def = nir_instr_def(instr); /* If a load is uniform, we don't want to set skip_helpers because @@ -227,8 +227,8 @@ nir_opt_load_skip_helpers(nir_shader *shader, nir_opt_load_skip_helpers_options } } - nir_instr_worklist_destroy(hs.load_instrs); - nir_instr_worklist_destroy(hs.worklist); + nir_instr_worklist_fini(&hs.load_instrs); + nir_instr_worklist_fini(&hs.worklist); ralloc_free(hs.needs_helpers); return nir_progress(progress, impl, nir_metadata_all); diff --git a/src/compiler/nir/nir_opt_mqsad.c b/src/compiler/nir/nir_opt_mqsad.c index 803b1ca8de0..f1f24d5358f 100644 --- a/src/compiler/nir/nir_opt_mqsad.c +++ b/src/compiler/nir/nir_opt_mqsad.c @@ -31,22 +31,23 @@ is_mqsad_compatible(struct mqsad *mqsad, nir_scalar ref, nir_scalar src0, nir_sc return false; /* Ensure that this MSAD doesn't depend on any previous MSAD. */ - nir_instr_worklist *wl = nir_instr_worklist_create(); - nir_instr_worklist_add_ssa_srcs(wl, &msad->instr); - nir_foreach_instr_in_worklist(instr, wl) { + nir_instr_worklist wl; + nir_instr_worklist_init(&wl); + nir_instr_worklist_add_ssa_srcs(&wl, &msad->instr); + nir_foreach_instr_in_worklist(instr, &wl) { if (instr->block != msad->instr.block || instr->index < mqsad->first_msad_index) continue; u_foreach_bit(i, mqsad->mask) { if (instr == &mqsad->msad[i]->instr) { - nir_instr_worklist_destroy(wl); + nir_instr_worklist_fini(&wl); return false; } } - nir_instr_worklist_add_ssa_srcs(wl, instr); + nir_instr_worklist_add_ssa_srcs(&wl, instr); } - nir_instr_worklist_destroy(wl); + nir_instr_worklist_fini(&wl); return true; } diff --git a/src/compiler/nir/nir_opt_phi_to_bool.c b/src/compiler/nir/nir_opt_phi_to_bool.c index 31ae3d2f967..571cce1d889 100644 --- a/src/compiler/nir/nir_opt_phi_to_bool.c +++ b/src/compiler/nir/nir_opt_phi_to_bool.c @@ -200,7 +200,8 @@ phi_to_bool(nir_builder *b, nir_phi_instr *phi, void *unused) bool nir_opt_phi_to_bool(nir_shader *shader) { - nir_instr_worklist *worklist = nir_instr_worklist_create(); + nir_instr_worklist worklist; + nir_instr_worklist_init(&worklist); nir_foreach_function_impl(impl, shader) { nir_foreach_block(block, impl) { @@ -213,7 +214,7 @@ nir_opt_phi_to_bool(nir_shader *shader) * so we need to revisit it later if nessecary. */ if (instr->pass_flags) - nir_instr_worklist_push_tail(worklist, instr); + nir_instr_worklist_push_tail(&worklist, instr); } else { instr->pass_flags = get_bool_types(instr); } @@ -221,16 +222,16 @@ nir_opt_phi_to_bool(nir_shader *shader) } } - nir_foreach_instr_in_worklist(instr, worklist) { + nir_foreach_instr_in_worklist(instr, &worklist) { uint8_t bool_types = get_bool_types(instr); if (instr->pass_flags != bool_types) { instr->pass_flags = bool_types; nir_foreach_use(use, nir_instr_def(instr)) - nir_instr_worklist_push_tail(worklist, nir_src_parent_instr(use)); + nir_instr_worklist_push_tail(&worklist, nir_src_parent_instr(use)); } } - nir_instr_worklist_destroy(worklist); + nir_instr_worklist_fini(&worklist); return nir_shader_phi_pass(shader, phi_to_bool, nir_metadata_control_flow, NULL); } diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index f2bdfc73e68..b0504a8e9cf 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -659,20 +659,21 @@ nir_algebraic_update_automaton(nir_instr *new_instr, const struct per_op_table *pass_op_table) { - nir_instr_worklist *automaton_worklist = nir_instr_worklist_create(); + nir_instr_worklist automaton_worklist; + nir_instr_worklist_init(&automaton_worklist); /* Walk through the tree of uses of our new instruction's SSA value, * recursively updating the automaton state until it stabilizes. */ - add_uses_to_worklist(new_instr, automaton_worklist, states, pass_op_table); + add_uses_to_worklist(new_instr, &automaton_worklist, states, pass_op_table); nir_instr *instr; - while ((instr = nir_instr_worklist_pop_head(automaton_worklist))) { + while ((instr = nir_instr_worklist_pop_head(&automaton_worklist))) { nir_instr_worklist_push_tail(algebraic_worklist, instr); - add_uses_to_worklist(instr, automaton_worklist, states, pass_op_table); + add_uses_to_worklist(instr, &automaton_worklist, states, pass_op_table); } - nir_instr_worklist_destroy(automaton_worklist); + nir_instr_worklist_fini(&automaton_worklist); } static nir_def * @@ -913,7 +914,8 @@ nir_algebraic_impl(nir_function_impl *impl, struct hash_table range_ht; _mesa_pointer_hash_table_init(&range_ht, NULL); - nir_instr_worklist *worklist = nir_instr_worklist_create(); + nir_instr_worklist worklist; + nir_instr_worklist_init(&worklist); /* Walk top-to-bottom setting up the automaton state. */ nir_foreach_block(block, impl) { @@ -930,7 +932,7 @@ nir_algebraic_impl(nir_function_impl *impl, nir_foreach_instr_reverse(instr, block) { instr->pass_flags = 0; if (instr->type == nir_instr_type_alu) - nir_instr_worklist_push_tail(worklist, instr); + nir_instr_worklist_push_tail(&worklist, instr); } } @@ -938,7 +940,7 @@ nir_algebraic_impl(nir_function_impl *impl, exec_list_make_empty(&dead_instrs); nir_instr *instr; - while ((instr = nir_instr_worklist_pop_head(worklist))) { + while ((instr = nir_instr_worklist_pop_head(&worklist))) { /* The worklist can have an instr pushed to it multiple times if it was * the src of multiple instrs that also got optimized, so make sure that * we don't try to re-optimize an instr we already handled. @@ -948,12 +950,12 @@ nir_algebraic_impl(nir_function_impl *impl, progress |= nir_algebraic_instr(&build, instr, &range_ht, condition_flags, - table, &states, worklist, &dead_instrs); + table, &states, &worklist, &dead_instrs); } nir_instr_free_list(&dead_instrs); - nir_instr_worklist_destroy(worklist); + nir_instr_worklist_fini(&worklist); _mesa_hash_table_fini(&range_ht, NULL); util_dynarray_fini(&states); diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h index 7b1dce023fd..982f8b47ba0 100644 --- a/src/compiler/nir/nir_worklist.h +++ b/src/compiler/nir/nir_worklist.h @@ -76,21 +76,15 @@ void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl); typedef struct { struct u_vector instr_vec; + bool initialized; } nir_instr_worklist; -static inline nir_instr_worklist * -nir_instr_worklist_create() +static inline bool +nir_instr_worklist_init(nir_instr_worklist *wl) { - nir_instr_worklist *wl = malloc(sizeof(nir_instr_worklist)); - if (!wl) - return NULL; - - if (!u_vector_init_pow2(&wl->instr_vec, 8, sizeof(struct nir_instr *))) { - free(wl); - return NULL; - } - - return wl; + wl->initialized = + u_vector_init_pow2(&wl->instr_vec, 8, sizeof(struct nir_instr *)) != 0; + return wl->initialized; } static inline uint32_t @@ -106,10 +100,9 @@ nir_instr_worklist_is_empty(nir_instr_worklist *wl) } static inline void -nir_instr_worklist_destroy(nir_instr_worklist *wl) +nir_instr_worklist_fini(nir_instr_worklist *wl) { u_vector_finish(&wl->instr_vec); - free(wl); } static inline void diff --git a/src/gallium/drivers/radeonsi/si_nir_optim.c b/src/gallium/drivers/radeonsi/si_nir_optim.c index a213911839e..e29bc55010b 100644 --- a/src/gallium/drivers/radeonsi/si_nir_optim.c +++ b/src/gallium/drivers/radeonsi/si_nir_optim.c @@ -35,12 +35,13 @@ check_instr_depends_on_tex(nir_intrinsic_instr *store) int texunit = -1; struct set *instrs = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - nir_instr_worklist *work = nir_instr_worklist_create(); + nir_instr_worklist work; + nir_instr_worklist_init(&work); _mesa_set_add(instrs, &store->instr); - add_src_instr_to_worklist(&store->src[0], work); + add_src_instr_to_worklist(&store->src[0], &work); - nir_foreach_instr_in_worklist(instr, work) { + nir_foreach_instr_in_worklist(instr, &work) { /* Don't process an instruction twice */ if (_mesa_set_search(instrs, instr)) continue; @@ -50,7 +51,7 @@ check_instr_depends_on_tex(nir_intrinsic_instr *store) if (instr->type == nir_instr_type_alu || instr->type == nir_instr_type_load_const) { /* TODO: ubo, etc */ - if (!nir_foreach_src(instr, add_src_instr_to_worklist, work)) + if (!nir_foreach_src(instr, add_src_instr_to_worklist, &work)) break; continue; } else if (instr->type == nir_instr_type_tex) { @@ -67,7 +68,7 @@ check_instr_depends_on_tex(nir_intrinsic_instr *store) } } - nir_instr_worklist_destroy(work); + nir_instr_worklist_fini(&work); _mesa_set_destroy(instrs, NULL); return texunit; } diff --git a/src/intel/compiler/brw_nir_opt_fsat.c b/src/intel/compiler/brw_nir_opt_fsat.c index 26419e9d206..d385b97da11 100644 --- a/src/intel/compiler/brw_nir_opt_fsat.c +++ b/src/intel/compiler/brw_nir_opt_fsat.c @@ -63,17 +63,15 @@ #include "brw_nir.h" #include "nir_worklist.h" -static nir_instr_worklist * -nir_instr_worklist_create_or_clear(nir_instr_worklist * wl) +static void +nir_instr_worklist_init_or_clear(nir_instr_worklist * wl) { - if (wl == NULL) { - return nir_instr_worklist_create(); + if (!wl->initialized) { + nir_instr_worklist_init(wl); } else { /* Clear any old cruft in the worklist. */ nir_foreach_instr_in_worklist(_, wl) ; - - return wl; } } @@ -199,7 +197,7 @@ brw_nir_opt_fsat(nir_shader *shader) { bool progress = false; void *mem_ctx = ralloc_context(NULL); - nir_instr_worklist *sources = NULL; + nir_instr_worklist sources = {0}; struct set *fixup = NULL; struct set *verified_phis = NULL; @@ -215,10 +213,10 @@ brw_nir_opt_fsat(nir_shader *shader) if (alu->op != nir_op_fsat) continue; - sources = nir_instr_worklist_create_or_clear(sources); + nir_instr_worklist_init_or_clear(&sources); fixup = _mesa_pointer_set_create_or_clear(mem_ctx, fixup, NULL); - collect_reaching_defs(alu, sources); + collect_reaching_defs(alu, &sources); /* verified_phis is a cache of phi nodes where all users of the * phi node are (eventually) fsat. Once a phi node is verified, it @@ -228,7 +226,7 @@ brw_nir_opt_fsat(nir_shader *shader) if (verified_phis == NULL) verified_phis = _mesa_pointer_set_create(mem_ctx); - if (verify_users(sources, verified_phis, fixup)) { + if (verify_users(&sources, verified_phis, fixup)) { fixup_defs(fixup); /* All defs that can reach the old fsat instruction must @@ -246,8 +244,8 @@ brw_nir_opt_fsat(nir_shader *shader) nir_metadata_control_flow); } - if (sources != NULL) - nir_instr_worklist_destroy(sources); + if (sources.initialized) + nir_instr_worklist_fini(&sources); ralloc_free(mem_ctx); diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c index 1e31fcd91a5..482197238a0 100644 --- a/src/microsoft/compiler/dxil_nir.c +++ b/src/microsoft/compiler/dxil_nir.c @@ -2397,15 +2397,16 @@ propagate_input_to_output_dependencies(struct dxil_module *mod, nir_intrinsic_in if (!set_input_bits(mod, load_intr, input_bits, &tables, &table_sizes)) return false; - nir_instr_worklist *worklist = nir_instr_worklist_create(); - nir_instr_worklist_push_tail(worklist, &load_intr->instr); + nir_instr_worklist worklist; + nir_instr_worklist_init(&worklist); + nir_instr_worklist_push_tail(&worklist, &load_intr->instr); bool any_bits_set = false; - nir_foreach_instr_in_worklist(instr, worklist) { + nir_foreach_instr_in_worklist(instr, &worklist) { if (instr->pass_flags) continue; instr->pass_flags = 1; - nir_foreach_def(instr, add_def_to_worklist, worklist); + nir_foreach_def(instr, add_def_to_worklist, &worklist); switch (instr->type) { case nir_instr_type_jump: { nir_jump_instr *jump = nir_instr_as_jump(instr); @@ -2417,7 +2418,7 @@ propagate_input_to_output_dependencies(struct dxil_module *mod, nir_intrinsic_in parent = parent->parent; nir_foreach_block_in_cf_node(block, parent) nir_foreach_instr(i, block) - nir_instr_worklist_push_tail(worklist, i); + nir_instr_worklist_push_tail(&worklist, i); } break; default: @@ -2443,7 +2444,7 @@ propagate_input_to_output_dependencies(struct dxil_module *mod, nir_intrinsic_in } } - nir_instr_worklist_destroy(worklist); + nir_instr_worklist_fini(&worklist); return any_bits_set; }