From 3dcba87ca3213ea12d14abacf0cc317b081e57f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Dec 2025 00:21:59 -0500 Subject: [PATCH] nir/opt_licm: hoist instructions across multiple levels of nested loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit radv gfx12: Totals: Instrs: 42861311 -> 42861476 (+0.00%); split: -0.00%, +0.00% CodeSize: 227917476 -> 227918160 (+0.00%); split: -0.00%, +0.00% Latency: 265381068 -> 265373506 (-0.00%); split: -0.00%, +0.00% InvThroughput: 42954018 -> 42952350 (-0.00%) VClause: 819026 -> 819024 (-0.00%) SClause: 1210348 -> 1210293 (-0.00%) Copies: 2919525 -> 2919597 (+0.00%); split: -0.00%, +0.00% PreSGPRs: 2889432 -> 2889406 (-0.00%) VALU: 23757371 -> 23757377 (+0.00%); split: -0.00%, +0.00% SALU: 5981417 -> 5981485 (+0.00%); split: -0.00%, +0.00% VOPD: 8966 -> 8964 (-0.02%) Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_opt_licm.c | 52 +++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/src/compiler/nir/nir_opt_licm.c b/src/compiler/nir/nir_opt_licm.c index ec13ee9b689..0178b58fd29 100644 --- a/src/compiler/nir/nir_opt_licm.c +++ b/src/compiler/nir/nir_opt_licm.c @@ -100,6 +100,29 @@ visit_cf_list(struct exec_list *list, licm_state *state) foreach_list_typed(nir_cf_node, node, node, list) { switch (node->type) { case nir_cf_node_block: { + nir_cf_node *next = nir_cf_node_next(node); + bool optimize_loop = false; + + /* If the next CF node is a loop that we optimize, visit it first + * before visiting its predecessor block, so that any instructions + * hoisted from this (potentially nested) loop are then considered + * for hoisting from the outer loop as well. The goal is to hoist + * instructions across all levels of nested loops. + */ + if (next && next->type == nir_cf_node_loop) { + nir_loop *inner_loop = nir_cf_node_as_loop(next); + optimize_loop = should_optimize_loop(inner_loop); + + if (optimize_loop) { + nir_loop *outer_loop = state->loop; + + state->loop = inner_loop; + progress |= visit_cf_list(&inner_loop->body, state); + progress |= visit_cf_list(&inner_loop->continue_list, state); + state->loop = outer_loop; + } + } + /* By only visiting blocks which dominate the block after the loop, * we ensure that we don't speculatively hoist any instructions * which otherwise might not be executed. @@ -111,6 +134,17 @@ visit_cf_list(struct exec_list *list, licm_state *state) if (state->loop && nir_block_dominates(block, nir_loop_successor_block(state->loop))) progress |= visit_block(block, state); + + if (next && next->type == nir_cf_node_loop && !optimize_loop) { + nir_loop *loop = nir_cf_node_as_loop(next); + + /* We treat this loop like any other block, so we don't do LICM + * from it per se, but if this loop is nested inside another + * loop, we still do LICM for the outer loop. + */ + progress |= visit_cf_list(&loop->body, state); + progress |= visit_cf_list(&loop->continue_list, state); + } break; } case nir_cf_node_if: { @@ -119,23 +153,9 @@ visit_cf_list(struct exec_list *list, licm_state *state) progress |= visit_cf_list(&nif->else_list, state); break; } - case nir_cf_node_loop: { - nir_loop *inner_loop = nir_cf_node_as_loop(node); - nir_loop *outer_loop = state->loop; - - /* If we don't optimize this loop, we treat it like a block, so we - * don't do LICM from it per se, but if this loop is nested inside - * another loop that's optimized, we still do LICM from this CF list - * for the outer loop. - */ - if (should_optimize_loop(inner_loop)) - state->loop = inner_loop; - - progress |= visit_cf_list(&inner_loop->body, state); - progress |= visit_cf_list(&inner_loop->continue_list, state); - state->loop = outer_loop; + case nir_cf_node_loop: + /* All loops are handled when handling their predecessor block. */ break; - } case nir_cf_node_function: UNREACHABLE("NIR LICM: Unsupported cf_node type."); }