diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index 3c2b5be4f21..73b24b9fe5c 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -114,6 +114,7 @@ else 'nir_deref.h', 'nir_divergence_analysis.c', 'nir_dominance.c', + 'nir_dominance_lca.c', 'nir_fixup_is_exported.c', 'nir_format_convert.c', 'nir_format_convert.h', diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 009b221748a..6920eccce91 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -692,6 +692,8 @@ nir_function_impl_create_bare(nir_shader *shader) impl->num_blocks = 0; impl->valid_metadata = nir_metadata_none; impl->structured = true; + range_minimum_query_table_init(&impl->dom_lca_info.table); + impl->dom_lca_info.block_from_idx = NULL; /* create start & end blocks */ nir_block *start_block = nir_block_create(shader); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 61d258097c5..4b7ca4e2460 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -41,6 +41,7 @@ #include "util/log.h" #include "util/macros.h" #include "util/ralloc.h" +#include "util/range_minimum_query.h" #include "util/set.h" #include "util/u_math.h" #include "nir_defines.h" @@ -3476,6 +3477,17 @@ typedef enum { */ nir_metadata_divergence = 0x40, + /** Indicates that block dominance lca information is valid + * + * This includes: + * + * - nir_function_impl::dom_lca_info + * + * A pass can preserve this metadata type if it preserves + * nir_metadata_dominance. + */ + nir_metadata_dominance_lca = 0x80, + /** All control flow metadata * * This includes all metadata preserved by a pass that preserves control flow @@ -3486,7 +3498,8 @@ typedef enum { * This is the most common metadata set to preserve, so it has its own alias. */ nir_metadata_control_flow = nir_metadata_block_index | - nir_metadata_dominance, + nir_metadata_dominance | + nir_metadata_dominance_lca, /** All metadata * @@ -3524,6 +3537,12 @@ typedef struct nir_function_impl { /* total number of basic blocks, only valid when block_index_dirty = false */ unsigned num_blocks; + /** Information used for LCA queries */ + struct nir_dom_lca_info { + struct range_minimum_query_table table; + nir_block **block_from_idx; + } dom_lca_info; + /** True if this nir_function_impl uses structured control-flow * * Structured nir_function_impls have different validation rules. @@ -4921,8 +4940,18 @@ bool nir_shader_lower_instructions(nir_shader *shader, void nir_calc_dominance_impl(nir_function_impl *impl); void nir_calc_dominance(nir_shader *shader); +void nir_calc_dominance_lca_impl(nir_function_impl *impl); +/** + * Computes the lowest common ancestor of two blocks in the dominator tree. + * + * If one of the blocks is null or unreachable, the other block is returned or + * NULL if it's unreachable. + * + * Requires nir_metadata_dominance_lca + */ nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); + bool nir_block_dominates(nir_block *parent, nir_block *child); bool nir_block_is_unreachable(nir_block *block); diff --git a/src/compiler/nir/nir_dominance.c b/src/compiler/nir/nir_dominance.c index 36f54c651ad..fef893b3f84 100644 --- a/src/compiler/nir/nir_dominance.c +++ b/src/compiler/nir/nir_dominance.c @@ -212,35 +212,6 @@ nir_calc_dominance(nir_shader *shader) } } -static nir_block * -block_return_if_reachable(nir_block *b) -{ - return (b && nir_block_is_reachable(b)) ? b : NULL; -} - -/** - * Computes the least common ancestor of two blocks. If one of the blocks - * is null or unreachable, the other block is returned or NULL if it's - * unreachable. - */ -nir_block * -nir_dominance_lca(nir_block *b1, nir_block *b2) -{ - if (b1 == NULL || !nir_block_is_reachable(b1)) - return block_return_if_reachable(b2); - - if (b2 == NULL || !nir_block_is_reachable(b2)) - return block_return_if_reachable(b1); - - assert(nir_cf_node_get_function(&b1->cf_node) == - nir_cf_node_get_function(&b2->cf_node)); - - assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & - nir_metadata_dominance); - - return intersect(b1, b2); -} - /** * Returns true if parent dominates child according to the following * definition: diff --git a/src/compiler/nir/nir_dominance_lca.c b/src/compiler/nir/nir_dominance_lca.c new file mode 100644 index 00000000000..84a7f1d6675 --- /dev/null +++ b/src/compiler/nir/nir_dominance_lca.c @@ -0,0 +1,177 @@ +/* + * Copyright 2025 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "nir.h" + +/** + * Find the lowest common ancestor in the dominance tree. + * + * We reduce the LCA problem to range minimum query using the standard euler + * tour method (see eg. Bender and Colton section 2). From there, we use the + * simple RMQ algorithm that uses O(n log n) preprcessing time and O(1) query + * time (Bender and Colton section 3). + * + * As a slight modification, we store the block index instead of the block + * depth. We can do this because the lower tree depth is always at a lower block + * index and we use an RMQ algorithm that doesn't rely on the -1/+1 property. + * + * Bender, M.A., Farach-Colton, M. (2000). The LCA Problem Revisited. In: + * Gonnet, G.H., Viola, A. (eds) LATIN 2000: Theoretical Informatics. LATIN + * 2000. Lecture Notes in Computer Science, vol 1776. Springer, Berlin, + * Heidelberg. https://doi.org/10.1007/10719839_9 + */ + +static void +realloc_info(nir_function_impl *impl) +{ + struct nir_dom_lca_info *info = &impl->dom_lca_info; + const uint32_t euler_tour_size = impl->num_blocks * 2 - 1; + + void *mem_ctx = ralloc_parent(impl); + range_minimum_query_table_resize(&info->table, mem_ctx, euler_tour_size); + info->block_from_idx = reralloc_array_size(mem_ctx, info->block_from_idx, + sizeof(nir_block *), + impl->num_blocks); +} + +static uint32_t +dom_lca_representative(nir_block *block) +{ + /* The dom_pre_index is 1-indexed so we need to subtract one to match our + * indices + */ + return block->dom_pre_index - 1; +} + +static void +generate_euler_tour(nir_function_impl *impl) +{ + uint32_t *table = impl->dom_lca_info.table.table; + nir_block **block_from_idx = impl->dom_lca_info.block_from_idx; + if (impl->num_blocks == 1) { + nir_block *block = nir_start_block(impl); + table[0] = 0; + block_from_idx[0] = block; + return; + } + + /* By definition, the first row of the table contains range minimum query + * lookups for each single-element block, meaning it is just the array that + * we will perform RMQs on. Therefore, when generating the Euler tour, we + * store results in the first row and are free to use the rest of the table + * as scratch memory for the depth-first search. + * + * The stack contains the index of the node's next child to visit. + */ + assert(impl->dom_lca_info.table.height >= 2); + STATIC_ASSERT(sizeof(uint32_t) <= sizeof(nir_block *)); + uint32_t *dfs_stack = (uint32_t *)&table[impl->dom_lca_info.table.width]; + + nir_block *cur_block = nir_start_block(impl); + uint32_t *cur_stack = dfs_stack; + + bool first_visit = true; + uint32_t i; + for (i = 0; i < impl->dom_lca_info.table.width; i++) { + if (cur_block == NULL) { + /* This can happen earlier than expected if some blocks are + * unreachable + */ + break; + } + + assert(cur_stack >= dfs_stack); + table[i] = cur_block->index; + + if (first_visit) { + /* First visit. Place it on the stack. */ + *cur_stack = 0; + assert(i == dom_lca_representative(cur_block)); + block_from_idx[cur_block->index] = cur_block; + } + + if (*cur_stack < cur_block->num_dom_children) { + cur_block = cur_block->dom_children[*cur_stack]; + *cur_stack += 1; + cur_stack += 1; + first_visit = true; + } else { + assert(*cur_stack == cur_block->num_dom_children); + cur_block = cur_block->imm_dom; + cur_stack -= 1; + first_visit = false; + } + } + + assert(cur_block == NULL); + + if (i != impl->dom_lca_info.table.width) { + void *mem_ctx = ralloc_parent(impl); + range_minimum_query_table_resize(&impl->dom_lca_info.table, mem_ctx, i); + } +} + +void +nir_calc_dominance_lca_impl(nir_function_impl *impl) +{ + if (impl->valid_metadata & nir_metadata_dominance_lca) + return; + + nir_metadata_require(impl, nir_metadata_block_index | + nir_metadata_dominance); + + realloc_info(impl); + generate_euler_tour(impl); + range_minimum_query_table_preprocess(&impl->dom_lca_info.table); +} + +static nir_block * +block_return_if_reachable(nir_block *b) +{ + return (b && nir_block_is_reachable(b)) ? b : NULL; +} + +static bool +is_lca(nir_block *result, nir_block *b1, nir_block *b2) +{ + if (!nir_block_dominates(result, b1) || !nir_block_dominates(result, b2)) + return false; + + for (int i = 0; i < result->num_dom_children; i++) { + nir_block *child = result->dom_children[i]; + if (nir_block_dominates(child, b1) && + nir_block_dominates(child, b2)) + return false; + } + + return true; +} + +nir_block * +nir_dominance_lca(nir_block *b1, nir_block *b2) +{ + if (b1 == NULL || !nir_block_is_reachable(b1)) + return block_return_if_reachable(b2); + + if (b2 == NULL || !nir_block_is_reachable(b2)) + return block_return_if_reachable(b1); + + assert(nir_cf_node_get_function(&b1->cf_node) == + nir_cf_node_get_function(&b2->cf_node)); + + nir_function_impl *impl = nir_cf_node_get_function(&b1->cf_node); + assert(impl->valid_metadata & nir_metadata_dominance_lca); + + uint32_t i1 = dom_lca_representative(b1); + uint32_t i2 = dom_lca_representative(b2); + if (i1 > i2) + SWAP(i1, i2); + uint32_t index = range_minimum_query(&impl->dom_lca_info.table, i1, i2 + 1); + nir_block *result = impl->dom_lca_info.block_from_idx[index]; + + assert(is_lca(result, b1, b2)); + + return result; +} diff --git a/src/compiler/nir/nir_metadata.c b/src/compiler/nir/nir_metadata.c index fc90b9b5c33..1f8a18eee72 100644 --- a/src/compiler/nir/nir_metadata.c +++ b/src/compiler/nir/nir_metadata.c @@ -38,6 +38,8 @@ nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...) nir_index_instrs(impl); if (NEEDS_UPDATE(nir_metadata_dominance)) nir_calc_dominance_impl(impl); + if (NEEDS_UPDATE(nir_metadata_dominance_lca)) + nir_calc_dominance_lca_impl(impl); if (NEEDS_UPDATE(nir_metadata_live_defs)) nir_live_defs_impl(impl); if (NEEDS_UPDATE(nir_metadata_divergence)) @@ -73,6 +75,9 @@ nir_progress(bool progress, nir_function_impl *impl, nir_metadata preserved) if (!progress) preserved = nir_metadata_all; + if (!(preserved & nir_metadata_dominance)) + assert(!(preserved & nir_metadata_dominance_lca)); + /* If we discard valid liveness information, immediately free the * liveness information for each block. For large shaders, it can * consume a huge amount of memory, and it's usually not immediately diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c index c7cda3c6aa8..2909025ea61 100644 --- a/src/compiler/nir/nir_opt_gcm.c +++ b/src/compiler/nir/nir_opt_gcm.c @@ -796,8 +796,9 @@ weak_gvn(const nir_instr *a, const nir_instr *b) static bool opt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number) { - nir_metadata_require(impl, - nir_metadata_block_index | nir_metadata_dominance); + nir_metadata_require(impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_dominance_lca); nir_metadata_require(impl, nir_metadata_loop_analysis, shader->options->force_indirect_unrolling, shader->options->force_indirect_unrolling_sampler); diff --git a/src/compiler/nir/nir_opt_sink.c b/src/compiler/nir/nir_opt_sink.c index 1cf8ea0af94..161b491f595 100644 --- a/src/compiler/nir/nir_opt_sink.c +++ b/src/compiler/nir/nir_opt_sink.c @@ -361,11 +361,12 @@ nir_opt_sink(nir_shader *shader, nir_move_options options) bool progress = false; nir_foreach_function_impl(impl, shader) { - nir_metadata_require(impl, - nir_metadata_block_index | nir_metadata_dominance | - (options & (nir_move_only_convergent | - nir_move_only_divergent) ? - nir_metadata_divergence : 0)); + nir_metadata required = nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_dominance_lca; + if (options & (nir_move_only_convergent | nir_move_only_divergent)) + required |= nir_metadata_divergence; + nir_metadata_require(impl, required); nir_foreach_block_reverse(block, impl) { nir_foreach_instr_reverse_safe(instr, block) { diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics_pre_trace.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics_pre_trace.c index 84771ed8935..42eaf995015 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics_pre_trace.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics_pre_trace.c @@ -64,7 +64,8 @@ brw_nir_lower_rt_intrinsics_pre_trace(nir_shader *nir) if (intrinsics->entries > 0) { nir_foreach_function_with_impl(func, impl, nir) { - nir_metadata_require(impl, nir_metadata_dominance); + nir_metadata_require(impl, nir_metadata_dominance | + nir_metadata_dominance_lca); /* Going in reverse order of blocks, move the intrinsics gather above * in the LCA block to trace calls.