mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 16:00:08 +01:00
nir: Add a faster lowest common ancestor algorithm
On a fossil from the blender 4.5.0 vulkan backend, this improves compile times in nak by about 17%. Compile time of other shaders improves by a more modest 1.2%. No stat changes on shader-db. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36184>
This commit is contained in:
parent
0d07b86073
commit
17876a00af
9 changed files with 226 additions and 38 deletions
|
|
@ -114,6 +114,7 @@ else
|
||||||
'nir_deref.h',
|
'nir_deref.h',
|
||||||
'nir_divergence_analysis.c',
|
'nir_divergence_analysis.c',
|
||||||
'nir_dominance.c',
|
'nir_dominance.c',
|
||||||
|
'nir_dominance_lca.c',
|
||||||
'nir_fixup_is_exported.c',
|
'nir_fixup_is_exported.c',
|
||||||
'nir_format_convert.c',
|
'nir_format_convert.c',
|
||||||
'nir_format_convert.h',
|
'nir_format_convert.h',
|
||||||
|
|
|
||||||
|
|
@ -692,6 +692,8 @@ nir_function_impl_create_bare(nir_shader *shader)
|
||||||
impl->num_blocks = 0;
|
impl->num_blocks = 0;
|
||||||
impl->valid_metadata = nir_metadata_none;
|
impl->valid_metadata = nir_metadata_none;
|
||||||
impl->structured = true;
|
impl->structured = true;
|
||||||
|
range_minimum_query_table_init(&impl->dom_lca_info.table);
|
||||||
|
impl->dom_lca_info.block_from_idx = NULL;
|
||||||
|
|
||||||
/* create start & end blocks */
|
/* create start & end blocks */
|
||||||
nir_block *start_block = nir_block_create(shader);
|
nir_block *start_block = nir_block_create(shader);
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@
|
||||||
#include "util/log.h"
|
#include "util/log.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
|
#include "util/range_minimum_query.h"
|
||||||
#include "util/set.h"
|
#include "util/set.h"
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
#include "nir_defines.h"
|
#include "nir_defines.h"
|
||||||
|
|
@ -3476,6 +3477,17 @@ typedef enum {
|
||||||
*/
|
*/
|
||||||
nir_metadata_divergence = 0x40,
|
nir_metadata_divergence = 0x40,
|
||||||
|
|
||||||
|
/** Indicates that block dominance lca information is valid
|
||||||
|
*
|
||||||
|
* This includes:
|
||||||
|
*
|
||||||
|
* - nir_function_impl::dom_lca_info
|
||||||
|
*
|
||||||
|
* A pass can preserve this metadata type if it preserves
|
||||||
|
* nir_metadata_dominance.
|
||||||
|
*/
|
||||||
|
nir_metadata_dominance_lca = 0x80,
|
||||||
|
|
||||||
/** All control flow metadata
|
/** All control flow metadata
|
||||||
*
|
*
|
||||||
* This includes all metadata preserved by a pass that preserves control flow
|
* This includes all metadata preserved by a pass that preserves control flow
|
||||||
|
|
@ -3486,7 +3498,8 @@ typedef enum {
|
||||||
* This is the most common metadata set to preserve, so it has its own alias.
|
* This is the most common metadata set to preserve, so it has its own alias.
|
||||||
*/
|
*/
|
||||||
nir_metadata_control_flow = nir_metadata_block_index |
|
nir_metadata_control_flow = nir_metadata_block_index |
|
||||||
nir_metadata_dominance,
|
nir_metadata_dominance |
|
||||||
|
nir_metadata_dominance_lca,
|
||||||
|
|
||||||
/** All metadata
|
/** All metadata
|
||||||
*
|
*
|
||||||
|
|
@ -3524,6 +3537,12 @@ typedef struct nir_function_impl {
|
||||||
/* total number of basic blocks, only valid when block_index_dirty = false */
|
/* total number of basic blocks, only valid when block_index_dirty = false */
|
||||||
unsigned num_blocks;
|
unsigned num_blocks;
|
||||||
|
|
||||||
|
/** Information used for LCA queries */
|
||||||
|
struct nir_dom_lca_info {
|
||||||
|
struct range_minimum_query_table table;
|
||||||
|
nir_block **block_from_idx;
|
||||||
|
} dom_lca_info;
|
||||||
|
|
||||||
/** True if this nir_function_impl uses structured control-flow
|
/** True if this nir_function_impl uses structured control-flow
|
||||||
*
|
*
|
||||||
* Structured nir_function_impls have different validation rules.
|
* Structured nir_function_impls have different validation rules.
|
||||||
|
|
@ -4921,8 +4940,18 @@ bool nir_shader_lower_instructions(nir_shader *shader,
|
||||||
|
|
||||||
void nir_calc_dominance_impl(nir_function_impl *impl);
|
void nir_calc_dominance_impl(nir_function_impl *impl);
|
||||||
void nir_calc_dominance(nir_shader *shader);
|
void nir_calc_dominance(nir_shader *shader);
|
||||||
|
void nir_calc_dominance_lca_impl(nir_function_impl *impl);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the lowest common ancestor of two blocks in the dominator tree.
|
||||||
|
*
|
||||||
|
* If one of the blocks is null or unreachable, the other block is returned or
|
||||||
|
* NULL if it's unreachable.
|
||||||
|
*
|
||||||
|
* Requires nir_metadata_dominance_lca
|
||||||
|
*/
|
||||||
nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
|
nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
|
||||||
|
|
||||||
bool nir_block_dominates(nir_block *parent, nir_block *child);
|
bool nir_block_dominates(nir_block *parent, nir_block *child);
|
||||||
bool nir_block_is_unreachable(nir_block *block);
|
bool nir_block_is_unreachable(nir_block *block);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -212,35 +212,6 @@ nir_calc_dominance(nir_shader *shader)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static nir_block *
|
|
||||||
block_return_if_reachable(nir_block *b)
|
|
||||||
{
|
|
||||||
return (b && nir_block_is_reachable(b)) ? b : NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the least common ancestor of two blocks. If one of the blocks
|
|
||||||
* is null or unreachable, the other block is returned or NULL if it's
|
|
||||||
* unreachable.
|
|
||||||
*/
|
|
||||||
nir_block *
|
|
||||||
nir_dominance_lca(nir_block *b1, nir_block *b2)
|
|
||||||
{
|
|
||||||
if (b1 == NULL || !nir_block_is_reachable(b1))
|
|
||||||
return block_return_if_reachable(b2);
|
|
||||||
|
|
||||||
if (b2 == NULL || !nir_block_is_reachable(b2))
|
|
||||||
return block_return_if_reachable(b1);
|
|
||||||
|
|
||||||
assert(nir_cf_node_get_function(&b1->cf_node) ==
|
|
||||||
nir_cf_node_get_function(&b2->cf_node));
|
|
||||||
|
|
||||||
assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
|
|
||||||
nir_metadata_dominance);
|
|
||||||
|
|
||||||
return intersect(b1, b2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if parent dominates child according to the following
|
* Returns true if parent dominates child according to the following
|
||||||
* definition:
|
* definition:
|
||||||
|
|
|
||||||
177
src/compiler/nir/nir_dominance_lca.c
Normal file
177
src/compiler/nir/nir_dominance_lca.c
Normal file
|
|
@ -0,0 +1,177 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2025 Valve Corporation
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "nir.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the lowest common ancestor in the dominance tree.
|
||||||
|
*
|
||||||
|
* We reduce the LCA problem to range minimum query using the standard euler
|
||||||
|
* tour method (see eg. Bender and Colton section 2). From there, we use the
|
||||||
|
* simple RMQ algorithm that uses O(n log n) preprcessing time and O(1) query
|
||||||
|
* time (Bender and Colton section 3).
|
||||||
|
*
|
||||||
|
* As a slight modification, we store the block index instead of the block
|
||||||
|
* depth. We can do this because the lower tree depth is always at a lower block
|
||||||
|
* index and we use an RMQ algorithm that doesn't rely on the -1/+1 property.
|
||||||
|
*
|
||||||
|
* Bender, M.A., Farach-Colton, M. (2000). The LCA Problem Revisited. In:
|
||||||
|
* Gonnet, G.H., Viola, A. (eds) LATIN 2000: Theoretical Informatics. LATIN
|
||||||
|
* 2000. Lecture Notes in Computer Science, vol 1776. Springer, Berlin,
|
||||||
|
* Heidelberg. https://doi.org/10.1007/10719839_9
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
realloc_info(nir_function_impl *impl)
|
||||||
|
{
|
||||||
|
struct nir_dom_lca_info *info = &impl->dom_lca_info;
|
||||||
|
const uint32_t euler_tour_size = impl->num_blocks * 2 - 1;
|
||||||
|
|
||||||
|
void *mem_ctx = ralloc_parent(impl);
|
||||||
|
range_minimum_query_table_resize(&info->table, mem_ctx, euler_tour_size);
|
||||||
|
info->block_from_idx = reralloc_array_size(mem_ctx, info->block_from_idx,
|
||||||
|
sizeof(nir_block *),
|
||||||
|
impl->num_blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
dom_lca_representative(nir_block *block)
|
||||||
|
{
|
||||||
|
/* The dom_pre_index is 1-indexed so we need to subtract one to match our
|
||||||
|
* indices
|
||||||
|
*/
|
||||||
|
return block->dom_pre_index - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
generate_euler_tour(nir_function_impl *impl)
|
||||||
|
{
|
||||||
|
uint32_t *table = impl->dom_lca_info.table.table;
|
||||||
|
nir_block **block_from_idx = impl->dom_lca_info.block_from_idx;
|
||||||
|
if (impl->num_blocks == 1) {
|
||||||
|
nir_block *block = nir_start_block(impl);
|
||||||
|
table[0] = 0;
|
||||||
|
block_from_idx[0] = block;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* By definition, the first row of the table contains range minimum query
|
||||||
|
* lookups for each single-element block, meaning it is just the array that
|
||||||
|
* we will perform RMQs on. Therefore, when generating the Euler tour, we
|
||||||
|
* store results in the first row and are free to use the rest of the table
|
||||||
|
* as scratch memory for the depth-first search.
|
||||||
|
*
|
||||||
|
* The stack contains the index of the node's next child to visit.
|
||||||
|
*/
|
||||||
|
assert(impl->dom_lca_info.table.height >= 2);
|
||||||
|
STATIC_ASSERT(sizeof(uint32_t) <= sizeof(nir_block *));
|
||||||
|
uint32_t *dfs_stack = (uint32_t *)&table[impl->dom_lca_info.table.width];
|
||||||
|
|
||||||
|
nir_block *cur_block = nir_start_block(impl);
|
||||||
|
uint32_t *cur_stack = dfs_stack;
|
||||||
|
|
||||||
|
bool first_visit = true;
|
||||||
|
uint32_t i;
|
||||||
|
for (i = 0; i < impl->dom_lca_info.table.width; i++) {
|
||||||
|
if (cur_block == NULL) {
|
||||||
|
/* This can happen earlier than expected if some blocks are
|
||||||
|
* unreachable
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(cur_stack >= dfs_stack);
|
||||||
|
table[i] = cur_block->index;
|
||||||
|
|
||||||
|
if (first_visit) {
|
||||||
|
/* First visit. Place it on the stack. */
|
||||||
|
*cur_stack = 0;
|
||||||
|
assert(i == dom_lca_representative(cur_block));
|
||||||
|
block_from_idx[cur_block->index] = cur_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*cur_stack < cur_block->num_dom_children) {
|
||||||
|
cur_block = cur_block->dom_children[*cur_stack];
|
||||||
|
*cur_stack += 1;
|
||||||
|
cur_stack += 1;
|
||||||
|
first_visit = true;
|
||||||
|
} else {
|
||||||
|
assert(*cur_stack == cur_block->num_dom_children);
|
||||||
|
cur_block = cur_block->imm_dom;
|
||||||
|
cur_stack -= 1;
|
||||||
|
first_visit = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(cur_block == NULL);
|
||||||
|
|
||||||
|
if (i != impl->dom_lca_info.table.width) {
|
||||||
|
void *mem_ctx = ralloc_parent(impl);
|
||||||
|
range_minimum_query_table_resize(&impl->dom_lca_info.table, mem_ctx, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
nir_calc_dominance_lca_impl(nir_function_impl *impl)
|
||||||
|
{
|
||||||
|
if (impl->valid_metadata & nir_metadata_dominance_lca)
|
||||||
|
return;
|
||||||
|
|
||||||
|
nir_metadata_require(impl, nir_metadata_block_index |
|
||||||
|
nir_metadata_dominance);
|
||||||
|
|
||||||
|
realloc_info(impl);
|
||||||
|
generate_euler_tour(impl);
|
||||||
|
range_minimum_query_table_preprocess(&impl->dom_lca_info.table);
|
||||||
|
}
|
||||||
|
|
||||||
|
static nir_block *
|
||||||
|
block_return_if_reachable(nir_block *b)
|
||||||
|
{
|
||||||
|
return (b && nir_block_is_reachable(b)) ? b : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_lca(nir_block *result, nir_block *b1, nir_block *b2)
|
||||||
|
{
|
||||||
|
if (!nir_block_dominates(result, b1) || !nir_block_dominates(result, b2))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (int i = 0; i < result->num_dom_children; i++) {
|
||||||
|
nir_block *child = result->dom_children[i];
|
||||||
|
if (nir_block_dominates(child, b1) &&
|
||||||
|
nir_block_dominates(child, b2))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_block *
|
||||||
|
nir_dominance_lca(nir_block *b1, nir_block *b2)
|
||||||
|
{
|
||||||
|
if (b1 == NULL || !nir_block_is_reachable(b1))
|
||||||
|
return block_return_if_reachable(b2);
|
||||||
|
|
||||||
|
if (b2 == NULL || !nir_block_is_reachable(b2))
|
||||||
|
return block_return_if_reachable(b1);
|
||||||
|
|
||||||
|
assert(nir_cf_node_get_function(&b1->cf_node) ==
|
||||||
|
nir_cf_node_get_function(&b2->cf_node));
|
||||||
|
|
||||||
|
nir_function_impl *impl = nir_cf_node_get_function(&b1->cf_node);
|
||||||
|
assert(impl->valid_metadata & nir_metadata_dominance_lca);
|
||||||
|
|
||||||
|
uint32_t i1 = dom_lca_representative(b1);
|
||||||
|
uint32_t i2 = dom_lca_representative(b2);
|
||||||
|
if (i1 > i2)
|
||||||
|
SWAP(i1, i2);
|
||||||
|
uint32_t index = range_minimum_query(&impl->dom_lca_info.table, i1, i2 + 1);
|
||||||
|
nir_block *result = impl->dom_lca_info.block_from_idx[index];
|
||||||
|
|
||||||
|
assert(is_lca(result, b1, b2));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
@ -38,6 +38,8 @@ nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...)
|
||||||
nir_index_instrs(impl);
|
nir_index_instrs(impl);
|
||||||
if (NEEDS_UPDATE(nir_metadata_dominance))
|
if (NEEDS_UPDATE(nir_metadata_dominance))
|
||||||
nir_calc_dominance_impl(impl);
|
nir_calc_dominance_impl(impl);
|
||||||
|
if (NEEDS_UPDATE(nir_metadata_dominance_lca))
|
||||||
|
nir_calc_dominance_lca_impl(impl);
|
||||||
if (NEEDS_UPDATE(nir_metadata_live_defs))
|
if (NEEDS_UPDATE(nir_metadata_live_defs))
|
||||||
nir_live_defs_impl(impl);
|
nir_live_defs_impl(impl);
|
||||||
if (NEEDS_UPDATE(nir_metadata_divergence))
|
if (NEEDS_UPDATE(nir_metadata_divergence))
|
||||||
|
|
@ -73,6 +75,9 @@ nir_progress(bool progress, nir_function_impl *impl, nir_metadata preserved)
|
||||||
if (!progress)
|
if (!progress)
|
||||||
preserved = nir_metadata_all;
|
preserved = nir_metadata_all;
|
||||||
|
|
||||||
|
if (!(preserved & nir_metadata_dominance))
|
||||||
|
assert(!(preserved & nir_metadata_dominance_lca));
|
||||||
|
|
||||||
/* If we discard valid liveness information, immediately free the
|
/* If we discard valid liveness information, immediately free the
|
||||||
* liveness information for each block. For large shaders, it can
|
* liveness information for each block. For large shaders, it can
|
||||||
* consume a huge amount of memory, and it's usually not immediately
|
* consume a huge amount of memory, and it's usually not immediately
|
||||||
|
|
|
||||||
|
|
@ -796,8 +796,9 @@ weak_gvn(const nir_instr *a, const nir_instr *b)
|
||||||
static bool
|
static bool
|
||||||
opt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number)
|
opt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number)
|
||||||
{
|
{
|
||||||
nir_metadata_require(impl,
|
nir_metadata_require(impl, nir_metadata_block_index |
|
||||||
nir_metadata_block_index | nir_metadata_dominance);
|
nir_metadata_dominance |
|
||||||
|
nir_metadata_dominance_lca);
|
||||||
nir_metadata_require(impl, nir_metadata_loop_analysis,
|
nir_metadata_require(impl, nir_metadata_loop_analysis,
|
||||||
shader->options->force_indirect_unrolling,
|
shader->options->force_indirect_unrolling,
|
||||||
shader->options->force_indirect_unrolling_sampler);
|
shader->options->force_indirect_unrolling_sampler);
|
||||||
|
|
|
||||||
|
|
@ -361,11 +361,12 @@ nir_opt_sink(nir_shader *shader, nir_move_options options)
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
|
|
||||||
nir_foreach_function_impl(impl, shader) {
|
nir_foreach_function_impl(impl, shader) {
|
||||||
nir_metadata_require(impl,
|
nir_metadata required = nir_metadata_block_index |
|
||||||
nir_metadata_block_index | nir_metadata_dominance |
|
nir_metadata_dominance |
|
||||||
(options & (nir_move_only_convergent |
|
nir_metadata_dominance_lca;
|
||||||
nir_move_only_divergent) ?
|
if (options & (nir_move_only_convergent | nir_move_only_divergent))
|
||||||
nir_metadata_divergence : 0));
|
required |= nir_metadata_divergence;
|
||||||
|
nir_metadata_require(impl, required);
|
||||||
|
|
||||||
nir_foreach_block_reverse(block, impl) {
|
nir_foreach_block_reverse(block, impl) {
|
||||||
nir_foreach_instr_reverse_safe(instr, block) {
|
nir_foreach_instr_reverse_safe(instr, block) {
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,8 @@ brw_nir_lower_rt_intrinsics_pre_trace(nir_shader *nir)
|
||||||
|
|
||||||
if (intrinsics->entries > 0) {
|
if (intrinsics->entries > 0) {
|
||||||
nir_foreach_function_with_impl(func, impl, nir) {
|
nir_foreach_function_with_impl(func, impl, nir) {
|
||||||
nir_metadata_require(impl, nir_metadata_dominance);
|
nir_metadata_require(impl, nir_metadata_dominance |
|
||||||
|
nir_metadata_dominance_lca);
|
||||||
|
|
||||||
/* Going in reverse order of blocks, move the intrinsics gather above
|
/* Going in reverse order of blocks, move the intrinsics gather above
|
||||||
* in the LCA block to trace calls.
|
* in the LCA block to trace calls.
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue