nir/sink: Rewrite loop handling logic

Previously, for code like:
loop {
    loop {
        a = load_ubo()
    }
    use(a)
}
adjust_block_for_loops() would return the block before the first loop.
Now we compute the range of allowed blocks and then walk the dominance
tree directly, guaranteeing directly that we always choose a block that
dominates all the uses and is dominated by the definition.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
(cherry picked from commit af9296b8c0)
This commit is contained in:
Connor Abbott 2019-09-25 14:02:48 +02:00 committed by Dylan Baker
parent 0beee2f723
commit d14d70de2f

View file

@ -76,23 +76,36 @@ get_innermost_loop(nir_cf_node *node)
return NULL;
}
/* return last block not after use_block with def_loop as it's innermost loop */
static nir_block *
adjust_block_for_loops(nir_block *use_block, nir_loop *def_loop)
static bool
loop_contains_block(nir_loop *loop, nir_block *block)
{
nir_loop *use_loop = NULL;
nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
for (nir_cf_node *node = &use_block->cf_node; node != NULL; node = node->parent) {
if (def_loop && node == &def_loop->cf_node)
break;
if (node->type == nir_cf_node_loop)
use_loop = nir_cf_node_as_loop(node);
}
if (use_loop) {
return nir_block_cf_tree_prev(nir_loop_first_block(use_loop));
} else {
return use_block;
return block->index > before->index && block->index < after->index;
}
/* Given the LCA of all uses and the definition, find a block on the path
* between them in the dominance tree that is outside of as many loops as
* possible.
*/
static nir_block *
adjust_block_for_loops(nir_block *use_block, nir_block *def_block)
{
for (nir_block *cur_block = use_block; cur_block != def_block->imm_dom;
cur_block = cur_block->imm_dom) {
nir_cf_node *next = nir_cf_node_next(&cur_block->cf_node);
if (next && next->type == nir_cf_node_loop) {
nir_loop *following_loop = nir_cf_node_as_loop(next);
if (loop_contains_block(following_loop, use_block)) {
use_block = cur_block;
continue;
}
}
}
return use_block;
}
/* iterate a ssa def's use's and try to find a more optimal block to
@ -106,10 +119,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
{
nir_block *lca = NULL;
nir_loop *def_loop = NULL;
if (!sink_into_loops)
def_loop = get_innermost_loop(&def->parent_instr->block->cf_node);
nir_foreach_use(use, def) {
nir_instr *instr = use->parent_instr;
nir_block *use_block = instr->block;
@ -131,18 +140,6 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
use_block = phi_lca;
}
/* If we're moving a load_ubo or load_interpolated_input, we don't want to
* sink it down into loops, which may result in accessing memory or shared
* functions multiple times. Sink it just above the start of the loop
* where it's used. For load_consts, undefs, and comparisons, we expect
* the driver to be able to emit them as simple ALU ops, so sinking as far
* in as we can go is probably worth it for register pressure.
*/
if (!sink_into_loops) {
use_block = adjust_block_for_loops(use_block, def_loop);
assert(nir_block_dominates(def->parent_instr->block, use_block));
}
lca = nir_dominance_lca(lca, use_block);
}
@ -150,14 +147,22 @@ get_preferred_block(nir_ssa_def *def, bool sink_into_loops)
nir_block *use_block =
nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
if (!sink_into_loops) {
use_block = adjust_block_for_loops(use_block, def_loop);
assert(nir_block_dominates(def->parent_instr->block, use_block));
}
lca = nir_dominance_lca(lca, use_block);
}
/* If we're moving a load_ubo or load_interpolated_input, we don't want to
* sink it down into loops, which may result in accessing memory or shared
* functions multiple times. Sink it just above the start of the loop
* where it's used. For load_consts, undefs, and comparisons, we expect
* the driver to be able to emit them as simple ALU ops, so sinking as far
* in as we can go is probably worth it for register pressure.
*/
if (!sink_into_loops) {
lca = adjust_block_for_loops(lca, def->parent_instr->block);
assert(nir_block_dominates(def->parent_instr->block, lca));
}
return lca;
}