nir/opt_licm: add a private state structure for the pass

The structure will grow in later commits.

The major change is that the preheader and exit blocks are replaced
by tracking just the innermost optimized nir_loop * and getting the
predecessor and successor blocks out of it.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41220>
This commit is contained in:
Marek Olšák 2025-12-03 23:00:48 -05:00 committed by Marge Bot
parent 90c4f1f9dc
commit e0112be522
2 changed files with 59 additions and 24 deletions

View file

@ -4970,6 +4970,18 @@ nir_block *nir_cf_node_cf_tree_prev(nir_cf_node *node);
block != nir_cf_node_cf_tree_prev(node); \
block = prev, prev = nir_block_cf_tree_prev(block))
static inline nir_block *
nir_loop_predecessor_block(nir_loop *loop)
{
return nir_cf_node_cf_tree_prev(&loop->cf_node);
}
static inline nir_block *
nir_loop_successor_block(nir_loop *loop)
{
return nir_cf_node_cf_tree_next(&loop->cf_node);
}
/* If the following CF node is an if, this function returns that if.
* Otherwise, it returns NULL.
*/

View file

@ -5,15 +5,24 @@
#include "nir.h"
typedef struct {
nir_loop *loop;
} licm_state;
static bool
defined_before_loop(nir_src *src, void *state)
defined_before_loop(nir_src *src, void *_state)
{
unsigned *loop_preheader_idx = state;
return nir_def_block(src->ssa)->index <= *loop_preheader_idx;
licm_state *state = (licm_state *)_state;
/* The current instruction is loop-invariant only if its sources are before
* the loop.
*/
return nir_def_block(src->ssa)->index <=
nir_loop_predecessor_block(state->loop)->index;
}
static bool
is_instr_loop_invariant(nir_instr *instr, unsigned loop_preheader_idx)
is_instr_loop_invariant(nir_instr *instr, licm_state *state)
{
switch (instr->type) {
case nir_instr_type_load_const:
@ -28,7 +37,7 @@ is_instr_loop_invariant(nir_instr *instr, unsigned loop_preheader_idx)
case nir_instr_type_alu:
case nir_instr_type_tex:
case nir_instr_type_deref:
return nir_foreach_src(instr, defined_before_loop, &loop_preheader_idx);
return nir_foreach_src(instr, defined_before_loop, state);
case nir_instr_type_phi:
case nir_instr_type_call:
@ -40,13 +49,16 @@ is_instr_loop_invariant(nir_instr *instr, unsigned loop_preheader_idx)
}
static bool
visit_block(nir_block *block, nir_block *preheader)
visit_block(nir_block *block, licm_state *state)
{
assert(state->loop);
bool progress = false;
nir_foreach_instr_safe(instr, block) {
if (is_instr_loop_invariant(instr, preheader->index)) {
if (is_instr_loop_invariant(instr, state)) {
nir_instr_remove(instr);
nir_instr_insert_after_block(preheader, instr);
nir_instr_insert_after_block(nir_loop_predecessor_block(state->loop),
instr);
progress = true;
}
}
@ -81,38 +93,47 @@ should_optimize_loop(nir_loop *loop)
}
static bool
visit_cf_list(struct exec_list *list, nir_block *preheader, nir_block *exit)
visit_cf_list(struct exec_list *list, licm_state *state)
{
bool progress = false;
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
case nir_cf_node_block: {
/* By only visiting blocks which dominate the loop exit, we
* ensure that we don't speculatively hoist any instructions
/* By only visiting blocks which dominate the block after the loop,
* we ensure that we don't speculatively hoist any instructions
* which otherwise might not be executed.
*
* Note, that the proper check would be whether this block
* postdominates the loop preheader.
* postdominates the block before the loop.
*/
nir_block *block = nir_cf_node_as_block(node);
if (exit && nir_block_dominates(block, exit))
progress |= visit_block(block, preheader);
if (state->loop &&
nir_block_dominates(block, nir_loop_successor_block(state->loop)))
progress |= visit_block(block, state);
break;
}
case nir_cf_node_if: {
nir_if *nif = nir_cf_node_as_if(node);
progress |= visit_cf_list(&nif->then_list, preheader, exit);
progress |= visit_cf_list(&nif->else_list, preheader, exit);
progress |= visit_cf_list(&nif->then_list, state);
progress |= visit_cf_list(&nif->else_list, state);
break;
}
case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(node);
bool opt = should_optimize_loop(loop);
nir_block *inner_preheader = opt ? nir_cf_node_cf_tree_prev(node) : preheader;
nir_block *inner_exit = opt ? nir_cf_node_cf_tree_next(node) : exit;
progress |= visit_cf_list(&loop->body, inner_preheader, inner_exit);
progress |= visit_cf_list(&loop->continue_list, inner_preheader, inner_exit);
nir_loop *inner_loop = nir_cf_node_as_loop(node);
nir_loop *outer_loop = state->loop;
/* If we don't optimize this loop, we treat it like a block, so we
* don't do LICM from it per se, but if this loop is nested inside
* another loop that's optimized, we still do LICM from this CF list
* for the outer loop.
*/
if (should_optimize_loop(inner_loop))
state->loop = inner_loop;
progress |= visit_cf_list(&inner_loop->body, state);
progress |= visit_cf_list(&inner_loop->continue_list, state);
state->loop = outer_loop;
break;
}
case nir_cf_node_function:
@ -126,14 +147,16 @@ visit_cf_list(struct exec_list *list, nir_block *preheader, nir_block *exit)
bool
nir_opt_licm(nir_shader *shader)
{
licm_state state = {0};
bool progress = false;
nir_foreach_function_impl(impl, shader) {
nir_metadata_require(impl, nir_metadata_block_index |
nir_metadata_dominance);
bool impl_progress = visit_cf_list(&impl->body, NULL, NULL);
progress |= nir_progress(impl_progress, impl,
state.loop = NULL;
progress |= nir_progress(visit_cf_list(&impl->body, &state), impl,
nir_metadata_block_index | nir_metadata_dominance);
}