From 4997d8fb1b2acf1b084b10ac08aa889f09c044fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 25 Nov 2025 11:18:37 +0100 Subject: [PATCH] nir/loop_analyze: determine for all ALU whether it can be constant-folded Totals from 16 (0.02% of 79839) affected shaders: (Navi48) MaxWaves: 512 -> 464 (-9.38%) Instrs: 11821 -> 17205 (+45.55%) CodeSize: 60536 -> 86644 (+43.13%) VGPRs: 732 -> 804 (+9.84%) Latency: 68411 -> 39349 (-42.48%) InvThroughput: 14217 -> 9306 (-34.54%) VClause: 223 -> 302 (+35.43%) SClause: 262 -> 317 (+20.99%) Copies: 961 -> 696 (-27.58%); split: -39.23%, +11.65% Branches: 182 -> 158 (-13.19%); split: -29.67%, +16.48% PreSGPRs: 1210 -> 945 (-21.90%); split: -29.42%, +7.52% PreVGPRs: 647 -> 633 (-2.16%) VALU: 5112 -> 10857 (+112.38%) SALU: 3215 -> 2335 (-27.37%); split: -30.67%, +3.30% VMEM: 228 -> 349 (+53.07%) SMEM: 567 -> 549 (-3.17%); split: -3.70%, +0.53% Part-of: --- src/compiler/nir/nir_loop_analyze.c | 109 +++++++++++++--------- src/freedreno/ci/freedreno-a6xx-skips.txt | 4 + 2 files changed, 71 insertions(+), 42 deletions(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index b6576c80270..641a798f242 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -48,21 +48,19 @@ get_loop_var(nir_def *value, loop_info_state *state) return NULL; } -/* If a condition is a comparision between a constant and - * a basic induction variable we know that it will be eliminated once - * the loop is unrolled. +/* If an instruction only depends on basic induction variables + * and constants, we know that it will be eliminated once the + * loop is unrolled. */ static bool -condition_can_constant_fold(loop_info_state *state, nir_scalar cond_scalar) +is_const_after_unrolling(loop_info_state *state, nir_def *def) { - nir_scalar lhs = nir_scalar_chase_alu_src(cond_scalar, 0); - nir_scalar rhs = nir_scalar_chase_alu_src(cond_scalar, 1); + nir_instr *instr = nir_def_instr(def); + if (instr->pass_flags == 0) + return false; - if (nir_scalar_is_const(lhs) && get_loop_var(rhs.def, state)) - return true; - if (nir_scalar_is_const(rhs) && get_loop_var(lhs.def, state)) - return true; - return false; + /* The pass flags are only correct within the loop. */ + return instr->block->index >= nir_loop_first_block(state->loop)->index; } /** Calculate an estimated cost in number of instructions @@ -87,29 +85,15 @@ instr_cost(loop_info_state *state, nir_instr *instr, const nir_op_info *info = &nir_op_infos[alu->op]; unsigned cost = 1; - if (nir_op_is_selection(alu->op)) { - bool can_constant_fold = true; - for (unsigned i = 0; can_constant_fold && i < alu->def.num_components; i++) { - nir_scalar cond_scalar = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, i), 0); - can_constant_fold &= nir_is_terminator_condition_with_two_inputs(cond_scalar) && - condition_can_constant_fold(state, cond_scalar); - } + /* Check if this instruction can be constant-folded after unrolling. */ + if (is_const_after_unrolling(state, &alu->def)) + return 0; + if (nir_op_is_selection(alu->op) && is_const_after_unrolling(state, alu->src[0].src.ssa)) { /* If the condition can be constant folded after the loop is unrolled, * so can the selection. */ - if (can_constant_fold) - return 0; - } else if (nir_alu_instr_is_comparison(alu) && - nir_op_infos[alu->op].num_inputs == 2) { - bool can_constant_fold = true; - for (unsigned i = 0; can_constant_fold && i < alu->def.num_components; i++) { - nir_scalar cond_scalar = nir_get_scalar(&alu->def, i); - can_constant_fold &= condition_can_constant_fold(state, cond_scalar); - } - - if (can_constant_fold) - return 0; + return 0; } else if (nir_op_is_vec_or_mov(alu->op)) { /* movs and vecs are likely free. */ return 0; @@ -1386,6 +1370,57 @@ force_unroll_heuristics(loop_info_state *state, nir_block *block) return false; } +static void +gather_constant_fold_info(loop_info_state *state, nir_instr *instr) +{ + instr->pass_flags = 0; + + /* Loop induction variables with constant initializer and constant + * update source get constant-folded when the loop is being unrolled. + */ + if (instr->type == nir_instr_type_phi && + instr->block == nir_loop_first_block(state->loop)) { + nir_loop_induction_variable *var = get_loop_var(nir_instr_def(instr), state); + + instr->pass_flags = var && nir_def_is_const(var->init_src->ssa) && + nir_def_is_const(var->update_src->src.ssa); + } + + if (instr->type != nir_instr_type_alu) + return; + + /* ALU instruction which only depend on constants and constant-foldable + * sources, can also be constant-folded. + */ + nir_alu_instr *alu = nir_instr_as_alu(instr); + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if(!nir_src_is_const(alu->src[i].src) && + !is_const_after_unrolling(state, alu->src[i].src.ssa)) + return; + } + + instr->pass_flags = 1; +} + +static void +gather_unroll_heuristic_info(loop_info_state *state, const nir_shader_compiler_options *options) +{ + nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { + /* Calculate instruction cost. */ + nir_foreach_instr(instr, block) { + gather_constant_fold_info(state, instr); + state->loop->info->instr_cost += instr_cost(state, instr, options); + } + + if (state->loop->info->force_unroll) + continue; + + if (force_unroll_heuristics(state, block)) { + state->loop->info->force_unroll = true; + } + } +} + static void get_loop_info(loop_info_state *state, nir_function_impl *impl) { @@ -1413,18 +1448,7 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl) impl->function->shader->info.float_controls_execution_mode, impl->function->shader->options->max_unroll_iterations); - nir_foreach_block_in_cf_node(block, &state->loop->cf_node) { - nir_foreach_instr(instr, block) { - state->loop->info->instr_cost += instr_cost(state, instr, options); - } - - if (state->loop->info->force_unroll) - continue; - - if (force_unroll_heuristics(state, block)) { - state->loop->info->force_unroll = true; - } - } + gather_unroll_heuristic_info(state, options); } static void @@ -1484,6 +1508,7 @@ nir_loop_analyze_impl(nir_function_impl *impl, bool force_unroll_sampler_indirect) { struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL); + nir_metadata_require(impl, nir_metadata_block_index); foreach_list_typed(nir_cf_node, node, node, &impl->body) process_loops(node, indirect_mask, force_unroll_sampler_indirect, range_ht); diff --git a/src/freedreno/ci/freedreno-a6xx-skips.txt b/src/freedreno/ci/freedreno-a6xx-skips.txt index c9b8564bca1..3e0efb9dd50 100644 --- a/src/freedreno/ci/freedreno-a6xx-skips.txt +++ b/src/freedreno/ci/freedreno-a6xx-skips.txt @@ -106,3 +106,7 @@ dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap_map2 dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap_map2 + +# Generates too many immediates due to loop unrolling +KHR-GLES31.core.geometry_shader.limits.max_output_components +KHR-GL46.geometry_shader.limits.max_output_components