From 6313e9f5497be4b7deec69c3605aee2efb8edb65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 21 Feb 2025 11:27:15 +0100 Subject: [PATCH] nir/opt_loop: Relax restrictions on opt_loop_peel_initial_break() for more loops In addition to loops where the break condition can be constant-folded, we also allow to peel the initial break from loops which have at least one phi with a constant loop-carried source, effectively removing that phi from the loop. Totals from 172 (0.22% of 79377) affected shaders: (Navi31) Instrs: 372798 -> 369181 (-0.97%); split: -1.07%, +0.10% CodeSize: 1907312 -> 1891948 (-0.81%); split: -0.89%, +0.09% VGPRs: 8436 -> 8460 (+0.28%) Latency: 3646016 -> 3396657 (-6.84%) InvThroughput: 434848 -> 389079 (-10.53%) Copies: 28436 -> 27118 (-4.63%); split: -4.79%, +0.15% Branches: 26504 -> 25344 (-4.38%); split: -4.44%, +0.06% PreSGPRs: 8585 -> 8603 (+0.21%) VALU: 148291 -> 148355 (+0.04%); split: -0.01%, +0.06% SALU: 95625 -> 92649 (-3.11%); split: -3.22%, +0.11% Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_opt_loop.c | 26 ++++++++++++++----- .../drivers/d3d12/ci/d3d12-quick_shader.txt | 1 + 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/compiler/nir/nir_opt_loop.c b/src/compiler/nir/nir_opt_loop.c index d24776f7720..10e515db00e 100644 --- a/src/compiler/nir/nir_opt_loop.c +++ b/src/compiler/nir/nir_opt_loop.c @@ -327,6 +327,18 @@ can_constant_fold(nir_scalar scalar, nir_block *loop_header) return false; } +static bool +has_phi_with_constant_src(nir_block *block, nir_block *pred) +{ + nir_foreach_phi(phi, block) { + nir_phi_src *src = nir_phi_get_src_from_block(phi, pred); + if (nir_src_is_const(src->src)) + return true; + } + + return false; +} + /** * This optimization tries to peel the first loop break. * @@ -387,17 +399,19 @@ opt_loop_peel_initial_break(nir_loop *loop) if (nir_block_ends_in_jump(nir_loop_last_block(loop))) return false; - /* Check that there is actual work to be done after the initial break. */ - if (!nir_block_contains_work(nir_cf_node_cf_tree_next(if_node))) - return false; - /* For now, we restrict this optimization to cases where the outer IF - * can be constant-folded. + * can be constant-folded or where at least one phi at the loop-header + * has a constant loop-carried source. If it can be constant-folded, + * we additionally require that there is actual work to be done after + * the initial break. This is to avoid unconditionally unrolling long + * loops. * * Note: If this restriction is lifted, it might recurse infinitely. * Prevent by e.g. restricting to single-exit loops. */ - if (!can_constant_fold(nir_get_scalar(nif->condition.ssa, 0), header_block)) + if (!has_phi_with_constant_src(header_block, nir_loop_last_block(loop)) && + (!nir_block_contains_work(nir_cf_node_cf_tree_next(if_node)) || + !can_constant_fold(nir_get_scalar(nif->condition.ssa, 0), header_block))) return false; /* Even though this if statement has a jump on one side, we may still have diff --git a/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt b/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt index 5d1a3e63db8..bf2e2118232 100644 --- a/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt +++ b/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt @@ -12,6 +12,7 @@ spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail spec@glsl-1.50@execution@primitive-id-no-gs-point,Fail spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec4-index-wr,Crash +shaders@ssa@fs-if-def-else-break,Fail # These tests use a TCS output variable only as temporary storage. Since the output # is unused by the TES, we remove it.