mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
nir/sink: Rewrite loop handling logic
Previously, for code like:
loop {
loop {
a = load_ubo()
}
use(a)
}
adjust_block_for_loops() would return the block before the first loop.
Now we compute the range of allowed blocks and then walk the dominance
tree directly, guaranteeing directly that we always choose a block that
dominates all the uses and is dominated by the definition.
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
(cherry picked from commit af9296b8c0)
This commit is contained in:
parent
0beee2f723
commit
d14d70de2f
1 changed files with 40 additions and 35 deletions
|
|
@ -76,23 +76,36 @@ get_innermost_loop(nir_cf_node *node)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* return last block not after use_block with def_loop as it's innermost loop */
|
||||
static nir_block *
|
||||
adjust_block_for_loops(nir_block *use_block, nir_loop *def_loop)
|
||||
static bool
|
||||
loop_contains_block(nir_loop *loop, nir_block *block)
|
||||
{
|
||||
nir_loop *use_loop = NULL;
|
||||
nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
|
||||
nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
|
||||
|
||||
for (nir_cf_node *node = &use_block->cf_node; node != NULL; node = node->parent) {
|
||||
if (def_loop && node == &def_loop->cf_node)
|
||||
break;
|
||||
if (node->type == nir_cf_node_loop)
|
||||
use_loop = nir_cf_node_as_loop(node);
|
||||
}
|
||||
if (use_loop) {
|
||||
return nir_block_cf_tree_prev(nir_loop_first_block(use_loop));
|
||||
} else {
|
||||
return use_block;
|
||||
return block->index > before->index && block->index < after->index;
|
||||
}
|
||||
|
||||
/* Given the LCA of all uses and the definition, find a block on the path
|
||||
* between them in the dominance tree that is outside of as many loops as
|
||||
* possible.
|
||||
*/
|
||||
|
||||
static nir_block *
|
||||
adjust_block_for_loops(nir_block *use_block, nir_block *def_block)
|
||||
{
|
||||
for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom;
|
||||
cur_block = cur_block->imm_dom) {
|
||||
nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node);
|
||||
if (next && next->type == nir_cf_node_loop) {
|
||||
nir_loop *following_loop = nir_cf_node_as_loop(next);
|
||||
if (loop_contains_block(following_loop, use_block)) {
|
||||
use_block = cur_block;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return use_block;
|
||||
}
|
||||
|
||||
/* iterate a ssa def's use's and try to find a more optimal block to
|
||||
|
|
@ -106,10 +119,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
|
|||
{
|
||||
nir_block *lca = NULL;
|
||||
|
||||
nir_loop *def_loop = NULL;
|
||||
if (!sink_into_loops)
|
||||
def_loop = get_innermost_loop(&def->parent_instr->block->cf_node);
|
||||
|
||||
nir_foreach_use(use, def) {
|
||||
nir_instr *instr = use->parent_instr;
|
||||
nir_block *use_block = instr->block;
|
||||
|
|
@ -131,18 +140,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
|
|||
use_block = phi_lca;
|
||||
}
|
||||
|
||||
/* If we're moving a load_ubo or load_interpolated_input, we don't want to
|
||||
* sink it down into loops, which may result in accessing memory or shared
|
||||
* functions multiple times. Sink it just above the start of the loop
|
||||
* where it's used. For load_consts, undefs, and comparisons, we expect
|
||||
* the driver to be able to emit them as simple ALU ops, so sinking as far
|
||||
* in as we can go is probably worth it for register pressure.
|
||||
*/
|
||||
if (!sink_into_loops) {
|
||||
use_block = adjust_block_for_loops(use_block, def_loop);
|
||||
assert(nir_block_dominates(def->parent_instr->block, use_block));
|
||||
}
|
||||
|
||||
lca = nir_dominance_lca(lca, use_block);
|
||||
}
|
||||
|
||||
|
|
@ -150,14 +147,22 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
|
|||
nir_block *use_block =
|
||||
nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
|
||||
|
||||
if (!sink_into_loops) {
|
||||
use_block = adjust_block_for_loops(use_block, def_loop);
|
||||
assert(nir_block_dominates(def->parent_instr->block, use_block));
|
||||
}
|
||||
|
||||
lca = nir_dominance_lca(lca, use_block);
|
||||
}
|
||||
|
||||
/* If we're moving a load_ubo or load_interpolated_input, we don't want to
|
||||
* sink it down into loops, which may result in accessing memory or shared
|
||||
* functions multiple times. Sink it just above the start of the loop
|
||||
* where it's used. For load_consts, undefs, and comparisons, we expect
|
||||
* the driver to be able to emit them as simple ALU ops, so sinking as far
|
||||
* in as we can go is probably worth it for register pressure.
|
||||
*/
|
||||
if (!sink_into_loops) {
|
||||
lca = adjust_block_for_loops(lca, def->parent_instr->block);
|
||||
assert(nir_block_dominates(def->parent_instr->block, lca));
|
||||
}
|
||||
|
||||
|
||||
return lca;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue