From d91e634c1306d805e913ec0f8981234c2587c41c Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 11 Jul 2024 12:53:19 +0100 Subject: [PATCH] aco: calculate indices from dominance tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This lets us do fast dominance checks. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_dominance.cpp | 77 ++++++++++++++++++++++++++++++ src/amd/compiler/aco_ir.h | 23 +++++++++ 2 files changed, 100 insertions(+) diff --git a/src/amd/compiler/aco_dominance.cpp b/src/amd/compiler/aco_dominance.cpp index 8918b415b85..6c97fcb7378 100644 --- a/src/amd/compiler/aco_dominance.cpp +++ b/src/amd/compiler/aco_dominance.cpp @@ -20,6 +20,81 @@ namespace aco { +namespace { + +struct block_dom_info { + uint32_t logical_descendants = 0; + uint32_t linear_descendants = 0; + uint32_t logical_depth = 0; + uint32_t linear_depth = 0; + small_vec logical_children; + small_vec linear_children; +}; + +void +calc_indices(Program* program) +{ + std::vector info(program->blocks.size()); + + /* Create the linear and logical dominance trees. Calculating logical_descendants and + * linear_descendants requires no recursion because the immediate dominator of each block has a + * lower index. */ + for (int i = program->blocks.size() - 1; i >= 0; i--) { + Block& block = program->blocks[i]; + + /* Add this as a child node of the parent. */ + if (block.logical_idom != i && block.logical_idom != -1) { + assert(i > block.logical_idom); + info[block.logical_idom].logical_children.push_back(i); + /* Add this node's descendants and itself to the parent. */ + info[block.logical_idom].logical_descendants += info[i].logical_descendants + 1; + } + if (block.linear_idom != i) { + assert(i > block.linear_idom); + info[block.linear_idom].linear_children.push_back(i); + info[block.linear_idom].linear_descendants += info[i].linear_descendants + 1; + } + } + + /* Fill in the indices that would be obtained in a preorder and postorder traversal of the + * dominance trees. */ + for (unsigned i = 0; i < program->blocks.size(); i++) { + Block& block = program->blocks[i]; + /* Because of block_kind_resume, the root node's indices start at the block index to avoid + * reusing indices. */ + if (block.logical_idom == (int)i) + block.logical_dom_pre_index = i; + if (block.linear_idom == (int)i) + block.linear_dom_pre_index = i; + + /* Visit each child and assign it's preorder indices and depth. */ + unsigned start = block.logical_dom_pre_index + 1; + for (unsigned j = 0; j < info[i].logical_children.size(); j++) { + unsigned child = info[i].logical_children[j]; + info[child].logical_depth = info[i].logical_depth + 1; + program->blocks[child].logical_dom_pre_index = start; + start += info[child].logical_descendants + 1; + } + start = block.linear_dom_pre_index + 1; + for (unsigned j = 0; j < info[i].linear_children.size(); j++) { + unsigned child = info[i].linear_children[j]; + info[child].linear_depth = info[i].linear_depth + 1; + program->blocks[child].linear_dom_pre_index = start; + start += info[child].linear_descendants + 1; + } + + /* The postorder traversal is the same as the preorder traversal, except that when this block + * is visited, we haven't visited it's ancestors and have already visited it's descendants. + * This means that the postorder_index is preorder_index-depth+descendants. */ + block.logical_dom_post_index = + block.logical_dom_pre_index - info[i].logical_depth + info[i].logical_descendants; + block.linear_dom_post_index = + block.linear_dom_pre_index - info[i].linear_depth + info[i].linear_descendants; + } +} + +} /* end namespace */ + void dominator_tree(Program* program) { @@ -72,6 +147,8 @@ dominator_tree(Program* program) block.logical_idom = new_logical_idom; block.linear_idom = new_linear_idom; } + + calc_indices(program); } } // namespace aco diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index df7232b0d46..eedd38e14ad 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1891,6 +1891,15 @@ struct Block { uint32_t kind = 0; int32_t logical_idom = -1; int32_t linear_idom = -1; + + /* Preorder and postorder traversal indices of the dominance tree. Because a program can have + * several dominance trees (because of block_kind_resume), these start at the block index of the + * root node. */ + uint32_t logical_dom_pre_index = 0; + uint32_t logical_dom_post_index = 0; + uint32_t linear_dom_pre_index = 0; + uint32_t linear_dom_post_index = 0; + uint16_t loop_nest_depth = 0; uint16_t divergent_if_logical_depth = 0; uint16_t uniform_if_depth = 0; @@ -2245,6 +2254,20 @@ uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves); bool uses_scratch(Program* program); +inline bool +dominates_logical(const Block& parent, const Block& child) +{ + return child.logical_dom_pre_index >= parent.logical_dom_pre_index && + child.logical_dom_post_index <= parent.logical_dom_post_index; +} + +inline bool +dominates_linear(const Block& parent, const Block& child) +{ + return child.linear_dom_pre_index >= parent.linear_dom_pre_index && + child.linear_dom_post_index <= parent.linear_dom_post_index; +} + typedef struct { const int16_t opcode_gfx7[static_cast(aco_opcode::num_opcodes)]; const int16_t opcode_gfx9[static_cast(aco_opcode::num_opcodes)];